Loading src/Benchmarks/SpMV/SpmvBenchmarkResult.h +7 −4 Original line number Diff line number Diff line Loading @@ -27,12 +27,14 @@ struct SpmvBenchmarkResult using HostVector = Containers::Vector< Real, Devices::Host, Index >; using BenchmarkVector = Containers::Vector< Real, Device, Index >; SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult ) : csrResult( csrResult ), benchmarkResult( benchmarkResult ){}; SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult, const IndexType nonzeros ) : csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){}; virtual HeaderElements getTableHeader() const override { return HeaderElements( {"time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); } virtual RowElements getRowElements() const override Loading @@ -41,7 +43,7 @@ struct SpmvBenchmarkResult benchmarkResultCopy = benchmarkResult; auto diff = csrResult - benchmarkResultCopy; RowElements elements; elements << time << stddev << stddev/time << bandwidth; elements << nonzeros << time << stddev << stddev/time << bandwidth; if( speedup != 0.0 ) elements << speedup; else elements << "N/A"; Loading @@ -51,6 +53,7 @@ struct SpmvBenchmarkResult const HostVector& csrResult; const BenchmarkVector& benchmarkResult; const IndexType nonzeros; }; } //namespace Benchmarks Loading src/Benchmarks/SpMV/spmv-legacy.h +9 −7 Original line number Diff line number Diff line Loading @@ -158,7 +158,7 @@ benchmarkSpMV( Benchmark& benchmark, benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) }, //{ "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", MatrixInfo< HostMatrix >::getFormat() } Loading @@ -181,7 +181,7 @@ benchmarkSpMV( Benchmark& benchmark, hostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector ); SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); /*** Loading @@ -199,7 +199,7 @@ benchmarkSpMV( Benchmark& benchmark, auto spmvCuda = [&]() { cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif std::cout << std::endl; Loading Loading @@ -233,7 +233,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR" ) } Loading @@ -250,7 +250,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, csrHostMatrix.vectorProduct( hostInVector, hostOutVector ); }; benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost ); SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults ); //// // Perform benchmark on CUDA device with cuSparse as a reference GPU format Loading @@ -258,7 +259,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, #ifdef HAVE_CUDA benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "cuSparse" ) } Loading Loading @@ -286,7 +287,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector ); }; benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse ); SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults ); #endif benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR ); Loading src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py +50 −13 Original line number Diff line number Diff line Loading @@ -60,21 +60,36 @@ df.sort_index(axis=1, inplace=True) # Drop CPU speedup df.drop(columns=('BiEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('BiEllpack', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('CSR', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('CSR Legacy', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Adaptive', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Light', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Scalar', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Stream', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Vector', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('ChunkedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('Ellpack', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('Ellpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('SlicedEllpack', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('SlicedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True ) #df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True ) print( "Exporting data frame to log.html..." ) pandas.options.display.float_format = '{:,.4f}'.format df.to_html("log.html") print( "Computing speed-up of formats...") # Add speedup compared to CSR and cuSparse df["BiEllpack Legacy", "CPU", "CSR speedup"] = df["BiEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] df["BiEllpack Legacy", "GPU", "cuSparse speedup"] = df["BiEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["BiEllpack", "CPU", "CSR speedup"] = df["BiEllpack", "CPU", "time"] / df["CSR", "CPU", "time"] df["BiEllpacky", "GPU", "cuSparse speedup"] = df["BiEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["CSR", "GPU", "cuSparse speedup"] = df["CSR", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["CSR Legacy", "GPU", "cuSparse speedup"] = df["CSR Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Adaptive", "GPU", "cuSparse speedup"] = df["CSR Legacy Adaptive", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Light", "GPU", "cuSparse speedup"] = df["CSR Legacy Light", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Scalar", "GPU", "cuSparse speedup"] = df["CSR Legacy Scalar", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Stream", "GPU", "cuSparse speedup"] = df["CSR Legacy Stream", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Vector", "GPU", "cuSparse speedup"] = df["CSR Legacy Vector", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["ChunkedEllpack Legacy", "CPU", "CSR speedup"] = df["ChunkedEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] df["ChunkedEllpack Legacy", "GPU", "cuSparse speedup"] = df["ChunkedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["Ellpack Legacy", "CPU", "CSR speedup"] = df["Ellpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] Loading @@ -87,22 +102,25 @@ df["SlicedEllpack", "CPU", "CSR speedup"] = df["SlicedEllpack", df["SlicedEllpack", "GPU", "cuSparse speedup"] = df["SlicedEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"] # Add speedup compared to legacy formats df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy", "GPU", "time"] df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy", "CPU", "time"] df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy Scalar", "GPU", "time"] df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy Scalar", "CPU", "time"] df["Ellpack", "GPU", "Legacy speedup"] = df["Ellpack", "GPU", "time"] / df["Ellpack Legacy", "GPU", "time"] df["Ellpack", "CPU", "Legacy speedup"] = df["Ellpack", "CPU", "time"] / df["Ellpack Legacy", "CPU", "time"] df["SlicedEllpack", "GPU", "Legacy speedup"] = df["SlicedEllpack", "GPU", "time"] / df["SlicedEllpack Legacy", "GPU", "time"] df["SlicedEllpack", "CPU", "Legacy speedup"] = df["SlicedEllpack", "CPU", "time"] / df["SlicedEllpack Legacy", "CPU", "time"] df["BiEllpack", "GPU", "Legacy speedup"] = df["BiEllpack", "GPU", "time"] / df["BiEllpack Legacy", "GPU", "time"] df["BiEllpack", "CPU", "Legacy speedup"] = df["BiEllpack", "CPU", "time"] / df["BiEllpack Legacy", "CPU", "time"] print( "Exporting data frame to log.html..." ) pandas.options.display.float_format = '{:,.4f}'.format df.to_html("log.html") #print( "Exporting data frame to log.html..." ) #pandas.options.display.float_format = '{:,.4f}'.format #df.to_html("log.html") # extract columns of reference formats on GPU print( "Preparing data for graph analysis..." ) df['cuSparse-bandwidth']=df['cuSparse','GPU','bandwidth'] df['ellpack-bandwidth']=df['Ellpack','GPU','bandwidth'] df['sliced-ellpack-bandwidth']=df['SlicedEllpack','GPU','bandwidth'] df['bi-ellpack-bandwidth']=df['BiEllpack','GPU','bandwidth'] # sort by cuSparse df.sort_values(by=["cuSparse-bandwidth"],inplace=True,ascending=False) Loading @@ -125,6 +143,12 @@ df.sort_values(by=["sliced-ellpack-bandwidth"],inplace=True,ascending=False) sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist(); sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist(); # sort by BiEllpack df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False) df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False) bi_ellpack_gpu_list = df["BiEllpack", "GPU", "bandwidth"].tolist(); bi_ellpack_legacy_gpu_list = df["BiEllpack Legacy", "GPU", "bandwidth"].tolist(); print( "Writing gnuplot files..." ) cuSparse_file = open( "cusparse.gplt", "w" ) Loading Loading @@ -155,7 +179,16 @@ for x in sliced_ellpack_gpu_list: if str( sliced_ellpack_legacy_gpu_list[ i ] ) != "nan": sliced_ellpack_file.write( f"{i+1} {x} {sliced_ellpack_legacy_gpu_list[ i ]}\n" ) i = i + 1 ellpack_file.close() sliced_ellpack_file.close() bi_ellpack_file = open( "bi-ellpack.gplt", "w" ) i = 0; for x in bi_ellpack_gpu_list: if str( x ) != "nan": if str( bi_ellpack_legacy_gpu_list[ i ] ) != "nan": bi_ellpack_file.write( f"{i+1} {x} {bi_ellpack_legacy_gpu_list[ i ]}\n" ) i = i + 1 bi_ellpack_file.close() print( "Generating Gnuplot file..." ) Loading @@ -179,18 +212,21 @@ gnuplot_file.write( " 'cusparse.gplt' using 1:5 title 'Sliced Ellpack' with gnuplot_file.write( " 'cusparse.gplt' using 1:6 title 'Sliced Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'chunked-ellpack-vs-cusparse.eps'\n" ) gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) #gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'bi-ellpack-vs-cusparse.eps'\n" ) gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) #gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:8 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'ellpack-vs-ellpack-legacy.eps'\n" ) gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'sliced-ellpack-vs-sliced-ellpack-legacy.eps'\n" ) gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "plot 'sliced-ellpack.gplt' using 1:2 title 'SlicedEllpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'sliced-ellpack.gplt' using 1:3 title 'SlicedEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'bi-ellpack-vs-bi-ellpack-legacy.eps'\n" ) gnuplot_file.write( "plot 'bi-ellpack.gplt' using 1:2 title 'BiEllpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'bi-ellpack.gplt' using 1:3 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.close() print( "Executing Gnuplot ..." ) Loading @@ -203,6 +239,7 @@ os.system( "epstopdf --autorotate All chunked-ellpack-vs-cusparse.eps" ) os.system( "epstopdf --autorotate All bi-ellpack-vs-cusparse.eps" ) os.system( "epstopdf --autorotate All ellpack-vs-ellpack-legacy.eps" ) os.system( "epstopdf --autorotate All sliced-ellpack-vs-sliced-ellpack-legacy.eps" ) os.system( "epstopdf --autorotate All bi-ellpack-vs-bi-ellpack-legacy.eps" ) print( "Deleting temprary files..." ) os.system( "rm cusparse.gplt" ) Loading Loading
src/Benchmarks/SpMV/SpmvBenchmarkResult.h +7 −4 Original line number Diff line number Diff line Loading @@ -27,12 +27,14 @@ struct SpmvBenchmarkResult using HostVector = Containers::Vector< Real, Devices::Host, Index >; using BenchmarkVector = Containers::Vector< Real, Device, Index >; SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult ) : csrResult( csrResult ), benchmarkResult( benchmarkResult ){}; SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult, const IndexType nonzeros ) : csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){}; virtual HeaderElements getTableHeader() const override { return HeaderElements( {"time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); } virtual RowElements getRowElements() const override Loading @@ -41,7 +43,7 @@ struct SpmvBenchmarkResult benchmarkResultCopy = benchmarkResult; auto diff = csrResult - benchmarkResultCopy; RowElements elements; elements << time << stddev << stddev/time << bandwidth; elements << nonzeros << time << stddev << stddev/time << bandwidth; if( speedup != 0.0 ) elements << speedup; else elements << "N/A"; Loading @@ -51,6 +53,7 @@ struct SpmvBenchmarkResult const HostVector& csrResult; const BenchmarkVector& benchmarkResult; const IndexType nonzeros; }; } //namespace Benchmarks Loading
src/Benchmarks/SpMV/spmv-legacy.h +9 −7 Original line number Diff line number Diff line Loading @@ -158,7 +158,7 @@ benchmarkSpMV( Benchmark& benchmark, benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) }, //{ "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", MatrixInfo< HostMatrix >::getFormat() } Loading @@ -181,7 +181,7 @@ benchmarkSpMV( Benchmark& benchmark, hostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector ); SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); /*** Loading @@ -199,7 +199,7 @@ benchmarkSpMV( Benchmark& benchmark, auto spmvCuda = [&]() { cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif std::cout << std::endl; Loading Loading @@ -233,7 +233,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR" ) } Loading @@ -250,7 +250,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, csrHostMatrix.vectorProduct( hostInVector, hostOutVector ); }; benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost ); SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults ); //// // Perform benchmark on CUDA device with cuSparse as a reference GPU format Loading @@ -258,7 +259,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, #ifdef HAVE_CUDA benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "cuSparse" ) } Loading Loading @@ -286,7 +287,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector ); }; benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse ); SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults ); #endif benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR ); Loading
src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py +50 −13 Original line number Diff line number Diff line Loading @@ -60,21 +60,36 @@ df.sort_index(axis=1, inplace=True) # Drop CPU speedup df.drop(columns=('BiEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('BiEllpack', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('CSR', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('CSR Legacy', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Adaptive', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Light', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Scalar', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Stream', 'CPU','speedup'), axis=1, inplace=True ) #df.drop(columns=('CSR Legacy Vector', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('ChunkedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('Ellpack', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('Ellpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('SlicedEllpack', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('SlicedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True ) #df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True ) print( "Exporting data frame to log.html..." ) pandas.options.display.float_format = '{:,.4f}'.format df.to_html("log.html") print( "Computing speed-up of formats...") # Add speedup compared to CSR and cuSparse df["BiEllpack Legacy", "CPU", "CSR speedup"] = df["BiEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] df["BiEllpack Legacy", "GPU", "cuSparse speedup"] = df["BiEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["BiEllpack", "CPU", "CSR speedup"] = df["BiEllpack", "CPU", "time"] / df["CSR", "CPU", "time"] df["BiEllpacky", "GPU", "cuSparse speedup"] = df["BiEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["CSR", "GPU", "cuSparse speedup"] = df["CSR", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["CSR Legacy", "GPU", "cuSparse speedup"] = df["CSR Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Adaptive", "GPU", "cuSparse speedup"] = df["CSR Legacy Adaptive", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Light", "GPU", "cuSparse speedup"] = df["CSR Legacy Light", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Scalar", "GPU", "cuSparse speedup"] = df["CSR Legacy Scalar", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Stream", "GPU", "cuSparse speedup"] = df["CSR Legacy Stream", "GPU", "time"] / df["cuSparse", "GPU", "time"] #df["CSR Legacy Vector", "GPU", "cuSparse speedup"] = df["CSR Legacy Vector", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["ChunkedEllpack Legacy", "CPU", "CSR speedup"] = df["ChunkedEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] df["ChunkedEllpack Legacy", "GPU", "cuSparse speedup"] = df["ChunkedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] df["Ellpack Legacy", "CPU", "CSR speedup"] = df["Ellpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] Loading @@ -87,22 +102,25 @@ df["SlicedEllpack", "CPU", "CSR speedup"] = df["SlicedEllpack", df["SlicedEllpack", "GPU", "cuSparse speedup"] = df["SlicedEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"] # Add speedup compared to legacy formats df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy", "GPU", "time"] df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy", "CPU", "time"] df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy Scalar", "GPU", "time"] df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy Scalar", "CPU", "time"] df["Ellpack", "GPU", "Legacy speedup"] = df["Ellpack", "GPU", "time"] / df["Ellpack Legacy", "GPU", "time"] df["Ellpack", "CPU", "Legacy speedup"] = df["Ellpack", "CPU", "time"] / df["Ellpack Legacy", "CPU", "time"] df["SlicedEllpack", "GPU", "Legacy speedup"] = df["SlicedEllpack", "GPU", "time"] / df["SlicedEllpack Legacy", "GPU", "time"] df["SlicedEllpack", "CPU", "Legacy speedup"] = df["SlicedEllpack", "CPU", "time"] / df["SlicedEllpack Legacy", "CPU", "time"] df["BiEllpack", "GPU", "Legacy speedup"] = df["BiEllpack", "GPU", "time"] / df["BiEllpack Legacy", "GPU", "time"] df["BiEllpack", "CPU", "Legacy speedup"] = df["BiEllpack", "CPU", "time"] / df["BiEllpack Legacy", "CPU", "time"] print( "Exporting data frame to log.html..." ) pandas.options.display.float_format = '{:,.4f}'.format df.to_html("log.html") #print( "Exporting data frame to log.html..." ) #pandas.options.display.float_format = '{:,.4f}'.format #df.to_html("log.html") # extract columns of reference formats on GPU print( "Preparing data for graph analysis..." ) df['cuSparse-bandwidth']=df['cuSparse','GPU','bandwidth'] df['ellpack-bandwidth']=df['Ellpack','GPU','bandwidth'] df['sliced-ellpack-bandwidth']=df['SlicedEllpack','GPU','bandwidth'] df['bi-ellpack-bandwidth']=df['BiEllpack','GPU','bandwidth'] # sort by cuSparse df.sort_values(by=["cuSparse-bandwidth"],inplace=True,ascending=False) Loading @@ -125,6 +143,12 @@ df.sort_values(by=["sliced-ellpack-bandwidth"],inplace=True,ascending=False) sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist(); sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist(); # sort by BiEllpack df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False) df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False) bi_ellpack_gpu_list = df["BiEllpack", "GPU", "bandwidth"].tolist(); bi_ellpack_legacy_gpu_list = df["BiEllpack Legacy", "GPU", "bandwidth"].tolist(); print( "Writing gnuplot files..." ) cuSparse_file = open( "cusparse.gplt", "w" ) Loading Loading @@ -155,7 +179,16 @@ for x in sliced_ellpack_gpu_list: if str( sliced_ellpack_legacy_gpu_list[ i ] ) != "nan": sliced_ellpack_file.write( f"{i+1} {x} {sliced_ellpack_legacy_gpu_list[ i ]}\n" ) i = i + 1 ellpack_file.close() sliced_ellpack_file.close() bi_ellpack_file = open( "bi-ellpack.gplt", "w" ) i = 0; for x in bi_ellpack_gpu_list: if str( x ) != "nan": if str( bi_ellpack_legacy_gpu_list[ i ] ) != "nan": bi_ellpack_file.write( f"{i+1} {x} {bi_ellpack_legacy_gpu_list[ i ]}\n" ) i = i + 1 bi_ellpack_file.close() print( "Generating Gnuplot file..." ) Loading @@ -179,18 +212,21 @@ gnuplot_file.write( " 'cusparse.gplt' using 1:5 title 'Sliced Ellpack' with gnuplot_file.write( " 'cusparse.gplt' using 1:6 title 'Sliced Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'chunked-ellpack-vs-cusparse.eps'\n" ) gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) #gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'bi-ellpack-vs-cusparse.eps'\n" ) gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) #gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" ) gnuplot_file.write( " 'cusparse.gplt' using 1:8 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'ellpack-vs-ellpack-legacy.eps'\n" ) gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'sliced-ellpack-vs-sliced-ellpack-legacy.eps'\n" ) gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "plot 'sliced-ellpack.gplt' using 1:2 title 'SlicedEllpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'sliced-ellpack.gplt' using 1:3 title 'SlicedEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.write( "set output 'bi-ellpack-vs-bi-ellpack-legacy.eps'\n" ) gnuplot_file.write( "plot 'bi-ellpack.gplt' using 1:2 title 'BiEllpack' with lines linewidth 2 lt rgb 'red', \\\n" ) gnuplot_file.write( " 'bi-ellpack.gplt' using 1:3 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) gnuplot_file.close() print( "Executing Gnuplot ..." ) Loading @@ -203,6 +239,7 @@ os.system( "epstopdf --autorotate All chunked-ellpack-vs-cusparse.eps" ) os.system( "epstopdf --autorotate All bi-ellpack-vs-cusparse.eps" ) os.system( "epstopdf --autorotate All ellpack-vs-ellpack-legacy.eps" ) os.system( "epstopdf --autorotate All sliced-ellpack-vs-sliced-ellpack-legacy.eps" ) os.system( "epstopdf --autorotate All bi-ellpack-vs-bi-ellpack-legacy.eps" ) print( "Deleting temprary files..." ) os.system( "rm cusparse.gplt" ) Loading