Loading src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables-json.py +156 −24 Original line number Diff line number Diff line #!/usr/bin/python3 import os import json import pandas as pd from pandas.io.json import json_normalize import matplotlib.pyplot as plt import numpy as np #### # Helper function def slugify(s): s = str(s).strip().replace(' ', '_') return re.sub(r'(?u)[^-\w.]', '', s) #### # Comparison with Cusparse def cusparse_comparison( df, formats ): if not os.path.exists("Cusparse-bw"): os.mkdir("Cusparse-bw") df.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and Cusparse" ) t = np.arange(df[(format,'GPU','bandwidth')].size ) fig, axs = plt.subplots( 2, 1 ) axs[0].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[0].set_ylabel( 'Bandwidth in GB/sec' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( 'Matrix ID - sorted w.r.t. Cusparse' ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-bw/{format}.pdf" ) plt.close(fig) #### # Comparison with CSR on CPU def csr_comparison( df, formats ): if not os.path.exists("CSR-bw"): os.mkdir("CSR-bw") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and CSR on CPU" ) result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"CSR-bw/{format}.pdf") plt.close(fig) #### # Comparison of Legacy formats def legacy_formats_comparison( df, formats ): if not os.path.exists("Legacy-bw"): os.mkdir("Legacy-bw") for ref_format, legacy_format in [ ('Ellpack', 'Ellpack Legacy'), ('SlicedEllpack', 'SlicedEllpack Legacy'), ('ChunkedEllpack', 'ChunkedEllpack Legacy'), ('BiEllpack', 'BiEllpack Legacy'), ('CSR< Adaptive >', 'CSR Legacy Adaptive'), ('CSR< Scalar >', 'CSR Legacy Scalar'), ('CSR< Vector >', 'CSR Legacy Vector') ]: if ref_format in formats and legacy_format in formats: print( f"Writing comparison of {ref_format} and {legacy_format}" ) result.sort_values(by=[(ref_format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(ref_format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {ref_format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Legacy-bw/{ref_format}.pdf") plt.close(fig) #### # Parse input file print( "Parsing input file...." ) Loading @@ -17,16 +97,19 @@ with open('sparse-matrix-benchmark.log') as f: input_df = json_normalize( d, record_path=['results'] ) #input_df.to_html( "orig-pandas.html" ) #### # Create multiindex for columns # Get format names - TODO: the first benchmark might not have all of them matrixName = input_df.iloc[0]['matrix name'] df_matrix = input_df.loc[input_df['matrix name'] == matrixName] formats = df_matrix.loc[:,'format'] formats = df_matrix.loc[:,'format'].values.tolist() formats = list(dict.fromkeys(formats)) # remove duplicates level1 = [ 'Matrix name', 'rows', 'columns' ] level2 = [ '', '', '' ] level3 = [ '', '', '' ] level4 = [ '', '', '' ] df_data = [[ ' ',' ',' ']] for format in formats: for device in ['CPU','GPU']: Loading @@ -34,13 +117,21 @@ for format in formats: level1.append( format ) level2.append( device ) level3.append( data ) level4.append( '' ) df_data[ 0 ].append( ' ' ) multiColumns = pd.MultiIndex.from_arrays([ level1, level2, level3 ] ) if not format in [ 'cusparse', 'CSR' ]: for speedup in [ 'cusparse', 'CSR CPU']: level1.append( format ) level2.append( 'GPU' ) level3.append( 'speed-up') level4.append( speedup ) df_data[ 0 ].append( ' ' ) multiColumns = pd.MultiIndex.from_arrays([ level1, level2, level3, level4 ] ) frames = [] in_idx = 0 out_idx = 0 max_out_idx = 50 max_out_idx = 10 print( "Converting data..." ) while in_idx < len(input_df.index) and out_idx < max_out_idx: matrixName = input_df.iloc[in_idx]['matrix name'] Loading @@ -54,7 +145,7 @@ while in_idx < len(input_df.index) and out_idx < max_out_idx: current_format = row['format'] current_device = row['device'] #print( current_format + " / " + current_device ) aux_df.iloc[0][(current_format,current_device,'bandwidth')] = row['bandwidth'] aux_df.iloc[0][(current_format,current_device,'bandwidth','')] = pd.to_numeric(row['bandwidth'], errors='coerce') #aux_df.iloc[0][(current_format,current_device,'time')] = row['time'] #aux_df.iloc[0][(current_format,current_device,'speed-up')] = row['speedup'] #aux_df.iloc[0][(current_format,current_device,'non-zeros')] = row['non-zeros'] Loading @@ -70,30 +161,71 @@ while in_idx < len(input_df.index) and out_idx < max_out_idx: print( "Merging data into one frame..." ) result = pd.concat( frames ) print( "Setting data types..." ) for format in formats: for device in ['CPU','GPU']: #df['eps'] = pd.to_numeric(df['eps'], errors='coerce') print(result[(format,device,'bandwidth')].toList()) result[(format,device,'bandwidth')] = pd.to_numeric( result[(format,device,'bandwidth')], errors='coerce' ) #result[(format,device,'time')].astype('float64') #result[(format,device,'speed-up')].astype('float64') #result[(format,device,'non-zeros')].astype('int64') #result[(format,device,'stddev')].astype('float64') #result[(format,device,'stddev/time')].astype('float64') #result[(format,device,'diff.max')].astype('float64') #result[(format,device,'diff.l2')].astype('float64') if not format in [ 'cusparse', 'CSR' ]: print( 'Adding speed-up for ', format ) format_bdw_list = result[(format,'GPU','bandwidth')] cusparse_bdw_list = result[('cusparse','GPU','bandwidth')] csr_bdw_list = result[('CSR','CPU','bandwidth')] cusparse_speedup_list = [] csr_speedup_list = [] for ( format_bdw, cusparse_bdw, csr_bdw ) in zip( format_bdw_list, cusparse_bdw_list,csr_bdw_list ): try: cusparse_speedup_list.append( format_bdw / cusparse_bdw ) except: cusparse_speedup_list.append('') try: csr_speedup_list.append( format_bdw / csr_bdw ) except: csr_speedup_list.append('') #print( f'**{type(format_bdw)}** -- {type(5.2)}' ) #if type(format_bdw) == "<class 'numpy.float64'>": # print( f'##########{format_bdw / cusparse_bdw}' ) # cusparse_speedup_list.append( format_bdw / cusparse_bdw ) # csr_speedup_list.append( format_bdw / csr_bdw ) #else: # cusparse_speedup_list.append('') # csr_speedup_list.append('') result[(format,'GPU','speed-up','cusparse')] = cusparse_speedup_list result[(format,'GPU','speed-up','CSR CPU')] = csr_speedup_list print( "Writting to HTML file..." ) result.to_html( 'output.html' ) result.replace( to_replace=' ',value=np.nan,inplace=True) #### # Generate report = tables and figures #result.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) #for format in formats: # cusparse_bw = result[('cusparse','GPU','bandwidth')].toList() # format_bw = result[(format,'GPU','bandwidth')].toList() # #cusparse_comparison( result, formats ) #csr_comparison( result, formats ) #legacy_formats_comparison( result, formats ) #for format in formats: # result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) #### # Comparison of speed-up w.r.t. Cusparse if not os.path.exists("Cusparse-speed-up"): os.mkdir("Cusparse-speed-up") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of speed-up of {format} compared to Cusparse" ) result['tmp'] = result[(format, 'GPU','bandwidth')] filtered_df=result.dropna(subset=['rows']) filtered_df.to_html( 'tmp.html') break filtered_df.sort_values(by=[(format,'GPU','speed-up','cusparse')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) size = result[(format,'GPU','bandwidth')].size t = np.arange( size ) bar = np.full( size, 1 ) axs[0].plot( t, filtered_df[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[0].plot( t, bar, '-', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[1].plot( t, bar, '-', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-speed-up/{format}.pdf") plt.close(fig) Loading
src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables-json.py +156 −24 Original line number Diff line number Diff line #!/usr/bin/python3 import os import json import pandas as pd from pandas.io.json import json_normalize import matplotlib.pyplot as plt import numpy as np #### # Helper function def slugify(s): s = str(s).strip().replace(' ', '_') return re.sub(r'(?u)[^-\w.]', '', s) #### # Comparison with Cusparse def cusparse_comparison( df, formats ): if not os.path.exists("Cusparse-bw"): os.mkdir("Cusparse-bw") df.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and Cusparse" ) t = np.arange(df[(format,'GPU','bandwidth')].size ) fig, axs = plt.subplots( 2, 1 ) axs[0].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[0].set_ylabel( 'Bandwidth in GB/sec' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( 'Matrix ID - sorted w.r.t. Cusparse' ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-bw/{format}.pdf" ) plt.close(fig) #### # Comparison with CSR on CPU def csr_comparison( df, formats ): if not os.path.exists("CSR-bw"): os.mkdir("CSR-bw") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and CSR on CPU" ) result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"CSR-bw/{format}.pdf") plt.close(fig) #### # Comparison of Legacy formats def legacy_formats_comparison( df, formats ): if not os.path.exists("Legacy-bw"): os.mkdir("Legacy-bw") for ref_format, legacy_format in [ ('Ellpack', 'Ellpack Legacy'), ('SlicedEllpack', 'SlicedEllpack Legacy'), ('ChunkedEllpack', 'ChunkedEllpack Legacy'), ('BiEllpack', 'BiEllpack Legacy'), ('CSR< Adaptive >', 'CSR Legacy Adaptive'), ('CSR< Scalar >', 'CSR Legacy Scalar'), ('CSR< Vector >', 'CSR Legacy Vector') ]: if ref_format in formats and legacy_format in formats: print( f"Writing comparison of {ref_format} and {legacy_format}" ) result.sort_values(by=[(ref_format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(ref_format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {ref_format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Legacy-bw/{ref_format}.pdf") plt.close(fig) #### # Parse input file print( "Parsing input file...." ) Loading @@ -17,16 +97,19 @@ with open('sparse-matrix-benchmark.log') as f: input_df = json_normalize( d, record_path=['results'] ) #input_df.to_html( "orig-pandas.html" ) #### # Create multiindex for columns # Get format names - TODO: the first benchmark might not have all of them matrixName = input_df.iloc[0]['matrix name'] df_matrix = input_df.loc[input_df['matrix name'] == matrixName] formats = df_matrix.loc[:,'format'] formats = df_matrix.loc[:,'format'].values.tolist() formats = list(dict.fromkeys(formats)) # remove duplicates level1 = [ 'Matrix name', 'rows', 'columns' ] level2 = [ '', '', '' ] level3 = [ '', '', '' ] level4 = [ '', '', '' ] df_data = [[ ' ',' ',' ']] for format in formats: for device in ['CPU','GPU']: Loading @@ -34,13 +117,21 @@ for format in formats: level1.append( format ) level2.append( device ) level3.append( data ) level4.append( '' ) df_data[ 0 ].append( ' ' ) multiColumns = pd.MultiIndex.from_arrays([ level1, level2, level3 ] ) if not format in [ 'cusparse', 'CSR' ]: for speedup in [ 'cusparse', 'CSR CPU']: level1.append( format ) level2.append( 'GPU' ) level3.append( 'speed-up') level4.append( speedup ) df_data[ 0 ].append( ' ' ) multiColumns = pd.MultiIndex.from_arrays([ level1, level2, level3, level4 ] ) frames = [] in_idx = 0 out_idx = 0 max_out_idx = 50 max_out_idx = 10 print( "Converting data..." ) while in_idx < len(input_df.index) and out_idx < max_out_idx: matrixName = input_df.iloc[in_idx]['matrix name'] Loading @@ -54,7 +145,7 @@ while in_idx < len(input_df.index) and out_idx < max_out_idx: current_format = row['format'] current_device = row['device'] #print( current_format + " / " + current_device ) aux_df.iloc[0][(current_format,current_device,'bandwidth')] = row['bandwidth'] aux_df.iloc[0][(current_format,current_device,'bandwidth','')] = pd.to_numeric(row['bandwidth'], errors='coerce') #aux_df.iloc[0][(current_format,current_device,'time')] = row['time'] #aux_df.iloc[0][(current_format,current_device,'speed-up')] = row['speedup'] #aux_df.iloc[0][(current_format,current_device,'non-zeros')] = row['non-zeros'] Loading @@ -70,30 +161,71 @@ while in_idx < len(input_df.index) and out_idx < max_out_idx: print( "Merging data into one frame..." ) result = pd.concat( frames ) print( "Setting data types..." ) for format in formats: for device in ['CPU','GPU']: #df['eps'] = pd.to_numeric(df['eps'], errors='coerce') print(result[(format,device,'bandwidth')].toList()) result[(format,device,'bandwidth')] = pd.to_numeric( result[(format,device,'bandwidth')], errors='coerce' ) #result[(format,device,'time')].astype('float64') #result[(format,device,'speed-up')].astype('float64') #result[(format,device,'non-zeros')].astype('int64') #result[(format,device,'stddev')].astype('float64') #result[(format,device,'stddev/time')].astype('float64') #result[(format,device,'diff.max')].astype('float64') #result[(format,device,'diff.l2')].astype('float64') if not format in [ 'cusparse', 'CSR' ]: print( 'Adding speed-up for ', format ) format_bdw_list = result[(format,'GPU','bandwidth')] cusparse_bdw_list = result[('cusparse','GPU','bandwidth')] csr_bdw_list = result[('CSR','CPU','bandwidth')] cusparse_speedup_list = [] csr_speedup_list = [] for ( format_bdw, cusparse_bdw, csr_bdw ) in zip( format_bdw_list, cusparse_bdw_list,csr_bdw_list ): try: cusparse_speedup_list.append( format_bdw / cusparse_bdw ) except: cusparse_speedup_list.append('') try: csr_speedup_list.append( format_bdw / csr_bdw ) except: csr_speedup_list.append('') #print( f'**{type(format_bdw)}** -- {type(5.2)}' ) #if type(format_bdw) == "<class 'numpy.float64'>": # print( f'##########{format_bdw / cusparse_bdw}' ) # cusparse_speedup_list.append( format_bdw / cusparse_bdw ) # csr_speedup_list.append( format_bdw / csr_bdw ) #else: # cusparse_speedup_list.append('') # csr_speedup_list.append('') result[(format,'GPU','speed-up','cusparse')] = cusparse_speedup_list result[(format,'GPU','speed-up','CSR CPU')] = csr_speedup_list print( "Writting to HTML file..." ) result.to_html( 'output.html' ) result.replace( to_replace=' ',value=np.nan,inplace=True) #### # Generate report = tables and figures #result.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) #for format in formats: # cusparse_bw = result[('cusparse','GPU','bandwidth')].toList() # format_bw = result[(format,'GPU','bandwidth')].toList() # #cusparse_comparison( result, formats ) #csr_comparison( result, formats ) #legacy_formats_comparison( result, formats ) #for format in formats: # result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) #### # Comparison of speed-up w.r.t. Cusparse if not os.path.exists("Cusparse-speed-up"): os.mkdir("Cusparse-speed-up") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of speed-up of {format} compared to Cusparse" ) result['tmp'] = result[(format, 'GPU','bandwidth')] filtered_df=result.dropna(subset=['rows']) filtered_df.to_html( 'tmp.html') break filtered_df.sort_values(by=[(format,'GPU','speed-up','cusparse')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) size = result[(format,'GPU','bandwidth')].size t = np.arange( size ) bar = np.full( size, 1 ) axs[0].plot( t, filtered_df[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[0].plot( t, bar, '-', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[1].plot( t, bar, '-', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-speed-up/{format}.pdf") plt.close(fig)