Loading src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables-json.py +176 −135 Original line number Diff line number Diff line Loading @@ -14,98 +14,17 @@ def slugify(s): return re.sub(r'(?u)[^-\w.]', '', s) #### # Comparison with Cusparse def cusparse_comparison( df, formats ): if not os.path.exists("Cusparse-bw"): os.mkdir("Cusparse-bw") df.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and Cusparse" ) t = np.arange(df[(format,'GPU','bandwidth')].size ) fig, axs = plt.subplots( 2, 1 ) axs[0].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[0].set_ylabel( 'Bandwidth in GB/sec' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( 'Matrix ID - sorted w.r.t. Cusparse' ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-bw/{format}.pdf" ) plt.close(fig) #### # Comparison with CSR on CPU def csr_comparison( df, formats ): if not os.path.exists("CSR-bw"): os.mkdir("CSR-bw") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and CSR on CPU" ) result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"CSR-bw/{format}.pdf") plt.close(fig) #### # Comparison of Legacy formats def legacy_formats_comparison( df, formats ): if not os.path.exists("Legacy-bw"): os.mkdir("Legacy-bw") for ref_format, legacy_format in [ ('Ellpack', 'Ellpack Legacy'), ('SlicedEllpack', 'SlicedEllpack Legacy'), ('ChunkedEllpack', 'ChunkedEllpack Legacy'), ('BiEllpack', 'BiEllpack Legacy'), ('CSR< Adaptive >', 'CSR Legacy Adaptive'), ('CSR< Scalar >', 'CSR Legacy Scalar'), ('CSR< Vector >', 'CSR Legacy Vector') ]: if ref_format in formats and legacy_format in formats: print( f"Writing comparison of {ref_format} and {legacy_format}" ) result.sort_values(by=[(ref_format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(ref_format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {ref_format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Legacy-bw/{ref_format}.pdf") plt.close(fig) #### # Parse input file print( "Parsing input file...." ) with open('sparse-matrix-benchmark.log') as f: d = json.load(f) input_df = json_normalize( d, record_path=['results'] ) #input_df.to_html( "orig-pandas.html" ) #### # Create multiindex for columns # Get format names - TODO: the first benchmark might not have all of them # Extract all formats def get_formats( input_df ): matrixName = input_df.iloc[0]['matrix name'] df_matrix = input_df.loc[input_df['matrix name'] == matrixName] formats = df_matrix.loc[:,'format'].values.tolist() formats = df_matrix.loc[:,'format'].values.tolist() # Get format names - TODO: the first benchmark might not have all of them formats = list(dict.fromkeys(formats)) # remove duplicates return formats #### # Create multiindex for columns def get_multiindex( input_df, formats ): level1 = [ 'Matrix name', 'rows', 'columns' ] level2 = [ '', '', '' ] level3 = [ '', '', '' ] Loading @@ -127,17 +46,22 @@ for format in formats: level4.append( speedup ) df_data[ 0 ].append( ' ' ) multiColumns = pd.MultiIndex.from_arrays([ level1, level2, level3, level4 ] ) frames = [] return multiColumns, df_data #### # Convert input table to better structured one def convert_data_frame( input_df, multicolumns, df_data, max_rows = -1 ): frames = [] in_idx = 0 out_idx = 0 max_out_idx = 10 print( "Converting data..." ) max_out_idx = max_rows if max_out_idx == -1: max_out_idx = len(input_df.index) while in_idx < len(input_df.index) and out_idx < max_out_idx: matrixName = input_df.iloc[in_idx]['matrix name'] df_matrix = input_df.loc[input_df['matrix name'] == matrixName] print( out_idx, ":", in_idx, "/", len(input_df.index), ":", matrixName ) aux_df = pd.DataFrame( df_data, columns = multiColumns, index = [out_idx] ) aux_df = pd.DataFrame( df_data, columns = multicolumns, index = [out_idx] ) for index,row in df_matrix.iterrows(): aux_df.iloc[0]['Matrix name'] = row['matrix name'] aux_df.iloc[0]['rows'] = row['rows'] Loading @@ -153,20 +77,21 @@ while in_idx < len(input_df.index) and out_idx < max_out_idx: #aux_df.iloc[0][(current_format,current_device,'stddev/time')] = row['stddev/time'] #aux_df.iloc[0][(current_format,current_device,'diff.max')] = row['CSR Diff.Max'] #aux_df.iloc[0][(current_format,current_device,'diff.l2')] = row['CSR Diff.L2'] frames.append( aux_df ) out_idx = out_idx + 1 in_idx = in_idx + len(df_matrix.index) print( "Merging data into one frame..." ) result = pd.concat( frames ) return result #### # Compute speed-up of particular formats compared to Cusparse on GPU and CSR on CPU def compute_speedup( df, formats ): for format in formats: if not format in [ 'cusparse', 'CSR' ]: print( 'Adding speed-up for ', format ) format_bdw_list = result[(format,'GPU','bandwidth')] cusparse_bdw_list = result[('cusparse','GPU','bandwidth')] csr_bdw_list = result[('CSR','CPU','bandwidth')] format_bdw_list = df[(format,'GPU','bandwidth')] cusparse_bdw_list = df[('cusparse','GPU','bandwidth')] csr_bdw_list = df[('CSR','CPU','bandwidth')] cusparse_speedup_list = [] csr_speedup_list = [] for ( format_bdw, cusparse_bdw, csr_bdw ) in zip( format_bdw_list, cusparse_bdw_list,csr_bdw_list ): Loading @@ -186,46 +111,162 @@ for format in formats: #else: # cusparse_speedup_list.append('') # csr_speedup_list.append('') df[(format,'GPU','speed-up','cusparse')] = cusparse_speedup_list df[(format,'GPU','speed-up','CSR CPU')] = csr_speedup_list result[(format,'GPU','speed-up','cusparse')] = cusparse_speedup_list result[(format,'GPU','speed-up','CSR CPU')] = csr_speedup_list print( "Writting to HTML file..." ) result.to_html( 'output.html' ) result.replace( to_replace=' ',value=np.nan,inplace=True) #### # Comparison with Cusparse def cusparse_comparison( df, formats, head_size=10 ): if not os.path.exists("Cusparse-bw"): os.mkdir("Cusparse-bw") df.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and Cusparse" ) filtered_df = df.dropna( subset=[(format,'GPU','bandwidth','')] ) t = np.arange(filtered_df[(format,'GPU','bandwidth')].size ) fig, axs = plt.subplots( 2, 1 ) axs[0].plot( t, filtered_df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, filtered_df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[0].set_ylabel( 'Bandwidth in GB/sec' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, filtered_df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, filtered_df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( 'Matrix ID - sorted w.r.t. Cusparse' ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-bw/{format}.pdf" ) plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"Cusparse-bw/{format}-head.html" ) #### # Generate report = tables and figures # Comparison with CSR on CPU def csr_comparison( df, formats, head_size=10 ): if not os.path.exists("CSR-bw"): os.mkdir("CSR-bw") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and CSR on CPU" ) result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"CSR-bw/{format}.pdf") plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"CSR-bw/{format}-head.html" ) #cusparse_comparison( result, formats ) #csr_comparison( result, formats ) #legacy_formats_comparison( result, formats ) #### # Comparison of Legacy formats def legacy_formats_comparison( df, formats, head_size=10 ): if not os.path.exists("Legacy-bw"): os.mkdir("Legacy-bw") for ref_format, legacy_format in [ ('Ellpack', 'Ellpack Legacy'), ('SlicedEllpack', 'SlicedEllpack Legacy'), ('ChunkedEllpack', 'ChunkedEllpack Legacy'), ('BiEllpack', 'BiEllpack Legacy'), ('CSR< Adaptive >', 'CSR Legacy Adaptive'), ('CSR< Scalar >', 'CSR Legacy Scalar'), ('CSR< Vector >', 'CSR Legacy Vector') ]: if ref_format in formats and legacy_format in formats: print( f"Writing comparison of {ref_format} and {legacy_format}" ) df.sort_values(by=[(ref_format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(df[(ref_format,'GPU','bandwidth')].size ) axs[0].plot( t, df[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, df[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, df[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, df[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {ref_format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Legacy-bw/{ref_format}.pdf") plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"Legacy-bw/{format}-head.html" ) #### # Comparison of speed-up w.r.t. Cusparse def cusparse_speedup_comparison( df, formats, head_size=10 ): if not os.path.exists("Cusparse-speed-up"): os.mkdir("Cusparse-speed-up") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of speed-up of {format} compared to Cusparse" ) result['tmp'] = result[(format, 'GPU','bandwidth')] filtered_df=result.dropna(subset=['rows']) filtered_df.to_html( 'tmp.html') break df['tmp'] = df[(format, 'GPU','bandwidth')] filtered_df=df.dropna(subset=[('tmp','','','')]) filtered_df.sort_values(by=[(format,'GPU','speed-up','cusparse')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) size = result[(format,'GPU','bandwidth')].size size = len(filtered_df[(format,'GPU','speed-up','cusparse')].index) t = np.arange( size ) bar = np.full( size, 1 ) axs[0].plot( t, filtered_df[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[0].plot( t, bar, '-', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[1].plot( t, filtered_df[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[1].plot( t, bar, '-', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-speed-up/{format}.pdf") plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"Cusparse-speed-up/{format}-head.html" ) #### # Parse input file print( "Parsing input file...." ) with open('sparse-matrix-benchmark.log') as f: d = json.load(f) input_df = json_normalize( d, record_path=['results'] ) #input_df.to_html( "orig-pandas.html" ) formats = get_formats( input_df ) multicolumns, df_data = get_multiindex( input_df, formats ) print( "Converting data..." ) result = convert_data_frame( input_df, multicolumns, df_data, 200 ) compute_speedup( result, formats ) print( "Writting to HTML file..." ) result.to_html( 'output.html' ) result.replace( to_replace=' ',value=np.nan,inplace=True) #### # Generate report = tables and figures head_size = 10 cusparse_comparison( result, formats, head_size ) csr_comparison( result, formats, head_size ) legacy_formats_comparison( result, formats, head_size ) cusparse_speedup_comparison( result, formats, head_size ) Loading
src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables-json.py +176 −135 Original line number Diff line number Diff line Loading @@ -14,98 +14,17 @@ def slugify(s): return re.sub(r'(?u)[^-\w.]', '', s) #### # Comparison with Cusparse def cusparse_comparison( df, formats ): if not os.path.exists("Cusparse-bw"): os.mkdir("Cusparse-bw") df.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and Cusparse" ) t = np.arange(df[(format,'GPU','bandwidth')].size ) fig, axs = plt.subplots( 2, 1 ) axs[0].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[0].set_ylabel( 'Bandwidth in GB/sec' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( 'Matrix ID - sorted w.r.t. Cusparse' ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-bw/{format}.pdf" ) plt.close(fig) #### # Comparison with CSR on CPU def csr_comparison( df, formats ): if not os.path.exists("CSR-bw"): os.mkdir("CSR-bw") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and CSR on CPU" ) result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"CSR-bw/{format}.pdf") plt.close(fig) #### # Comparison of Legacy formats def legacy_formats_comparison( df, formats ): if not os.path.exists("Legacy-bw"): os.mkdir("Legacy-bw") for ref_format, legacy_format in [ ('Ellpack', 'Ellpack Legacy'), ('SlicedEllpack', 'SlicedEllpack Legacy'), ('ChunkedEllpack', 'ChunkedEllpack Legacy'), ('BiEllpack', 'BiEllpack Legacy'), ('CSR< Adaptive >', 'CSR Legacy Adaptive'), ('CSR< Scalar >', 'CSR Legacy Scalar'), ('CSR< Vector >', 'CSR Legacy Vector') ]: if ref_format in formats and legacy_format in formats: print( f"Writing comparison of {ref_format} and {legacy_format}" ) result.sort_values(by=[(ref_format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(ref_format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {ref_format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Legacy-bw/{ref_format}.pdf") plt.close(fig) #### # Parse input file print( "Parsing input file...." ) with open('sparse-matrix-benchmark.log') as f: d = json.load(f) input_df = json_normalize( d, record_path=['results'] ) #input_df.to_html( "orig-pandas.html" ) #### # Create multiindex for columns # Get format names - TODO: the first benchmark might not have all of them # Extract all formats def get_formats( input_df ): matrixName = input_df.iloc[0]['matrix name'] df_matrix = input_df.loc[input_df['matrix name'] == matrixName] formats = df_matrix.loc[:,'format'].values.tolist() formats = df_matrix.loc[:,'format'].values.tolist() # Get format names - TODO: the first benchmark might not have all of them formats = list(dict.fromkeys(formats)) # remove duplicates return formats #### # Create multiindex for columns def get_multiindex( input_df, formats ): level1 = [ 'Matrix name', 'rows', 'columns' ] level2 = [ '', '', '' ] level3 = [ '', '', '' ] Loading @@ -127,17 +46,22 @@ for format in formats: level4.append( speedup ) df_data[ 0 ].append( ' ' ) multiColumns = pd.MultiIndex.from_arrays([ level1, level2, level3, level4 ] ) frames = [] return multiColumns, df_data #### # Convert input table to better structured one def convert_data_frame( input_df, multicolumns, df_data, max_rows = -1 ): frames = [] in_idx = 0 out_idx = 0 max_out_idx = 10 print( "Converting data..." ) max_out_idx = max_rows if max_out_idx == -1: max_out_idx = len(input_df.index) while in_idx < len(input_df.index) and out_idx < max_out_idx: matrixName = input_df.iloc[in_idx]['matrix name'] df_matrix = input_df.loc[input_df['matrix name'] == matrixName] print( out_idx, ":", in_idx, "/", len(input_df.index), ":", matrixName ) aux_df = pd.DataFrame( df_data, columns = multiColumns, index = [out_idx] ) aux_df = pd.DataFrame( df_data, columns = multicolumns, index = [out_idx] ) for index,row in df_matrix.iterrows(): aux_df.iloc[0]['Matrix name'] = row['matrix name'] aux_df.iloc[0]['rows'] = row['rows'] Loading @@ -153,20 +77,21 @@ while in_idx < len(input_df.index) and out_idx < max_out_idx: #aux_df.iloc[0][(current_format,current_device,'stddev/time')] = row['stddev/time'] #aux_df.iloc[0][(current_format,current_device,'diff.max')] = row['CSR Diff.Max'] #aux_df.iloc[0][(current_format,current_device,'diff.l2')] = row['CSR Diff.L2'] frames.append( aux_df ) out_idx = out_idx + 1 in_idx = in_idx + len(df_matrix.index) print( "Merging data into one frame..." ) result = pd.concat( frames ) return result #### # Compute speed-up of particular formats compared to Cusparse on GPU and CSR on CPU def compute_speedup( df, formats ): for format in formats: if not format in [ 'cusparse', 'CSR' ]: print( 'Adding speed-up for ', format ) format_bdw_list = result[(format,'GPU','bandwidth')] cusparse_bdw_list = result[('cusparse','GPU','bandwidth')] csr_bdw_list = result[('CSR','CPU','bandwidth')] format_bdw_list = df[(format,'GPU','bandwidth')] cusparse_bdw_list = df[('cusparse','GPU','bandwidth')] csr_bdw_list = df[('CSR','CPU','bandwidth')] cusparse_speedup_list = [] csr_speedup_list = [] for ( format_bdw, cusparse_bdw, csr_bdw ) in zip( format_bdw_list, cusparse_bdw_list,csr_bdw_list ): Loading @@ -186,46 +111,162 @@ for format in formats: #else: # cusparse_speedup_list.append('') # csr_speedup_list.append('') df[(format,'GPU','speed-up','cusparse')] = cusparse_speedup_list df[(format,'GPU','speed-up','CSR CPU')] = csr_speedup_list result[(format,'GPU','speed-up','cusparse')] = cusparse_speedup_list result[(format,'GPU','speed-up','CSR CPU')] = csr_speedup_list print( "Writting to HTML file..." ) result.to_html( 'output.html' ) result.replace( to_replace=' ',value=np.nan,inplace=True) #### # Comparison with Cusparse def cusparse_comparison( df, formats, head_size=10 ): if not os.path.exists("Cusparse-bw"): os.mkdir("Cusparse-bw") df.sort_values(by=[('cusparse','GPU','bandwidth')],inplace=True,ascending=False) for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and Cusparse" ) filtered_df = df.dropna( subset=[(format,'GPU','bandwidth','')] ) t = np.arange(filtered_df[(format,'GPU','bandwidth')].size ) fig, axs = plt.subplots( 2, 1 ) axs[0].plot( t, filtered_df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, filtered_df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[0].set_ylabel( 'Bandwidth in GB/sec' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, filtered_df[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, filtered_df[('cusparse','GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( 'Matrix ID - sorted w.r.t. Cusparse' ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-bw/{format}.pdf" ) plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"Cusparse-bw/{format}-head.html" ) #### # Generate report = tables and figures # Comparison with CSR on CPU def csr_comparison( df, formats, head_size=10 ): if not os.path.exists("CSR-bw"): os.mkdir("CSR-bw") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of {format} and CSR on CPU" ) result.sort_values(by=[(format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(result[(format,'GPU','bandwidth')].size ) axs[0].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, result[('CSR','CPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ format, 'CSR on CPU' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"CSR-bw/{format}.pdf") plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"CSR-bw/{format}-head.html" ) #cusparse_comparison( result, formats ) #csr_comparison( result, formats ) #legacy_formats_comparison( result, formats ) #### # Comparison of Legacy formats def legacy_formats_comparison( df, formats, head_size=10 ): if not os.path.exists("Legacy-bw"): os.mkdir("Legacy-bw") for ref_format, legacy_format in [ ('Ellpack', 'Ellpack Legacy'), ('SlicedEllpack', 'SlicedEllpack Legacy'), ('ChunkedEllpack', 'ChunkedEllpack Legacy'), ('BiEllpack', 'BiEllpack Legacy'), ('CSR< Adaptive >', 'CSR Legacy Adaptive'), ('CSR< Scalar >', 'CSR Legacy Scalar'), ('CSR< Vector >', 'CSR Legacy Vector') ]: if ref_format in formats and legacy_format in formats: print( f"Writing comparison of {ref_format} and {legacy_format}" ) df.sort_values(by=[(ref_format,'GPU','bandwidth')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) t = np.arange(df[(ref_format,'GPU','bandwidth')].size ) axs[0].plot( t, df[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].plot( t, df[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[0].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, df[(ref_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].plot( t, df[(legacy_format,'GPU','bandwidth')], '-o', ms=1, lw=1 ) axs[1].legend( [ ref_format, legacy_format ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {ref_format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Legacy-bw/{ref_format}.pdf") plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"Legacy-bw/{format}-head.html" ) #### # Comparison of speed-up w.r.t. Cusparse def cusparse_speedup_comparison( df, formats, head_size=10 ): if not os.path.exists("Cusparse-speed-up"): os.mkdir("Cusparse-speed-up") for format in formats: if not format in ['cusparse','CSR']: print( f"Writing comparison of speed-up of {format} compared to Cusparse" ) result['tmp'] = result[(format, 'GPU','bandwidth')] filtered_df=result.dropna(subset=['rows']) filtered_df.to_html( 'tmp.html') break df['tmp'] = df[(format, 'GPU','bandwidth')] filtered_df=df.dropna(subset=[('tmp','','','')]) filtered_df.sort_values(by=[(format,'GPU','speed-up','cusparse')],inplace=True,ascending=False) fig, axs = plt.subplots( 2, 1 ) size = result[(format,'GPU','bandwidth')].size size = len(filtered_df[(format,'GPU','speed-up','cusparse')].index) t = np.arange( size ) bar = np.full( size, 1 ) axs[0].plot( t, filtered_df[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[0].plot( t, bar, '-', ms=1, lw=1 ) axs[0].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_yscale( 'log' ) axs[1].plot( t, result[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[1].plot( t, filtered_df[(format,'GPU','speed-up','cusparse')], '-o', ms=1, lw=1 ) axs[1].plot( t, bar, '-', ms=1, lw=1 ) axs[1].legend( [ format, 'Cusparse' ], loc='upper right' ) axs[1].set_xlabel( f"Matrix ID - sorted w.r.t. {format}" ) axs[1].set_ylabel( 'Bandwidth in GB/sec' ) plt.savefig( f"Cusparse-speed-up/{format}.pdf") plt.close(fig) head_df = filtered_df.head( head_size ) for f in formats: if not f in ['cusparse','CSR',format]: print( f"Droping {f}..." ) head_df.drop( labels=f, axis='columns', level=0, inplace=True ) head_df.to_html( f"Cusparse-speed-up/{format}-head.html" ) #### # Parse input file print( "Parsing input file...." ) with open('sparse-matrix-benchmark.log') as f: d = json.load(f) input_df = json_normalize( d, record_path=['results'] ) #input_df.to_html( "orig-pandas.html" ) formats = get_formats( input_df ) multicolumns, df_data = get_multiindex( input_df, formats ) print( "Converting data..." ) result = convert_data_frame( input_df, multicolumns, df_data, 200 ) compute_speedup( result, formats ) print( "Writting to HTML file..." ) result.to_html( 'output.html' ) result.replace( to_replace=' ',value=np.nan,inplace=True) #### # Generate report = tables and figures head_size = 10 cusparse_comparison( result, formats, head_size ) csr_comparison( result, formats, head_size ) legacy_formats_comparison( result, formats, head_size ) cusparse_speedup_comparison( result, formats, head_size )