...
 
Commits (2)
......@@ -27,12 +27,14 @@ struct SpmvBenchmarkResult
using HostVector = Containers::Vector< Real, Devices::Host, Index >;
using BenchmarkVector = Containers::Vector< Real, Device, Index >;
SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult )
: csrResult( csrResult ), benchmarkResult( benchmarkResult ){};
SpmvBenchmarkResult( const HostVector& csrResult,
const BenchmarkVector& benchmarkResult,
const IndexType nonzeros )
: csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};
virtual HeaderElements getTableHeader() const override
{
return HeaderElements( {"time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
}
virtual RowElements getRowElements() const override
......@@ -41,7 +43,7 @@ struct SpmvBenchmarkResult
benchmarkResultCopy = benchmarkResult;
auto diff = csrResult - benchmarkResultCopy;
RowElements elements;
elements << time << stddev << stddev/time << bandwidth;
elements << nonzeros << time << stddev << stddev/time << bandwidth;
if( speedup != 0.0 )
elements << speedup;
else elements << "N/A";
......@@ -51,6 +53,7 @@ struct SpmvBenchmarkResult
const HostVector& csrResult;
const BenchmarkVector& benchmarkResult;
const IndexType nonzeros;
};
} //namespace Benchmarks
......
......@@ -158,7 +158,7 @@ benchmarkSpMV( Benchmark& benchmark,
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( inputFileName ) },
{ "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) },
//{ "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) },
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) },
{ "matrix format", MatrixInfo< HostMatrix >::getFormat() }
......@@ -181,7 +181,7 @@ benchmarkSpMV( Benchmark& benchmark,
hostMatrix.vectorProduct( hostInVector, hostOutVector );
};
SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
/***
......@@ -199,7 +199,7 @@ benchmarkSpMV( Benchmark& benchmark,
auto spmvCuda = [&]() {
cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
};
SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
#endif
std::cout << std::endl;
......@@ -233,7 +233,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
//
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( inputFileName ) },
{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
//{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( csrHostMatrix.getRows() ) },
{ "columns", convertToString( csrHostMatrix.getColumns() ) },
{ "matrix format", String( "CSR" ) }
......@@ -250,7 +250,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
};
benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost );
SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults );
////
// Perform benchmark on CUDA device with cuSparse as a reference GPU format
......@@ -258,7 +259,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
#ifdef HAVE_CUDA
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( inputFileName ) },
{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
//{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( csrHostMatrix.getRows() ) },
{ "columns", convertToString( csrHostMatrix.getColumns() ) },
{ "matrix format", String( "cuSparse" ) }
......@@ -286,7 +287,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector );
};
benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
#endif
benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );
......
......@@ -60,21 +60,36 @@ df.sort_index(axis=1, inplace=True)
# Drop CPU speedup
df.drop(columns=('BiEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('BiEllpack', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('CSR', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('CSR Legacy', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Adaptive', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Light', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Scalar', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Stream', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Vector', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('ChunkedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('Ellpack', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('Ellpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('SlicedEllpack', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('SlicedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True )
#df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True )
print( "Exporting data frame to log.html..." )
pandas.options.display.float_format = '{:,.4f}'.format
df.to_html("log.html")
print( "Computing speed-up of formats...")
# Add speedup compared to CSR and cuSparse
df["BiEllpack Legacy", "CPU", "CSR speedup"] = df["BiEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"]
df["BiEllpack Legacy", "GPU", "cuSparse speedup"] = df["BiEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["BiEllpack", "CPU", "CSR speedup"] = df["BiEllpack", "CPU", "time"] / df["CSR", "CPU", "time"]
df["BiEllpacky", "GPU", "cuSparse speedup"] = df["BiEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["CSR", "GPU", "cuSparse speedup"] = df["CSR", "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["CSR Legacy", "GPU", "cuSparse speedup"] = df["CSR Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Adaptive", "GPU", "cuSparse speedup"] = df["CSR Legacy Adaptive", "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Light", "GPU", "cuSparse speedup"] = df["CSR Legacy Light", "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Scalar", "GPU", "cuSparse speedup"] = df["CSR Legacy Scalar", "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Stream", "GPU", "cuSparse speedup"] = df["CSR Legacy Stream", "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Vector", "GPU", "cuSparse speedup"] = df["CSR Legacy Vector", "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["ChunkedEllpack Legacy", "CPU", "CSR speedup"] = df["ChunkedEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"]
df["ChunkedEllpack Legacy", "GPU", "cuSparse speedup"] = df["ChunkedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["Ellpack Legacy", "CPU", "CSR speedup"] = df["Ellpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"]
......@@ -87,22 +102,25 @@ df["SlicedEllpack", "CPU", "CSR speedup"] = df["SlicedEllpack",
df["SlicedEllpack", "GPU", "cuSparse speedup"] = df["SlicedEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"]
# Add speedup compared to legacy formats
df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy", "GPU", "time"]
df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy", "CPU", "time"]
df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy Scalar", "GPU", "time"]
df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy Scalar", "CPU", "time"]
df["Ellpack", "GPU", "Legacy speedup"] = df["Ellpack", "GPU", "time"] / df["Ellpack Legacy", "GPU", "time"]
df["Ellpack", "CPU", "Legacy speedup"] = df["Ellpack", "CPU", "time"] / df["Ellpack Legacy", "CPU", "time"]
df["SlicedEllpack", "GPU", "Legacy speedup"] = df["SlicedEllpack", "GPU", "time"] / df["SlicedEllpack Legacy", "GPU", "time"]
df["SlicedEllpack", "CPU", "Legacy speedup"] = df["SlicedEllpack", "CPU", "time"] / df["SlicedEllpack Legacy", "CPU", "time"]
df["BiEllpack", "GPU", "Legacy speedup"] = df["BiEllpack", "GPU", "time"] / df["BiEllpack Legacy", "GPU", "time"]
df["BiEllpack", "CPU", "Legacy speedup"] = df["BiEllpack", "CPU", "time"] / df["BiEllpack Legacy", "CPU", "time"]
print( "Exporting data frame to log.html..." )
pandas.options.display.float_format = '{:,.4f}'.format
df.to_html("log.html")
#print( "Exporting data frame to log.html..." )
#pandas.options.display.float_format = '{:,.4f}'.format
#df.to_html("log.html")
# extract columns of reference formats on GPU
print( "Preparing data for graph analysis..." )
df['cuSparse-bandwidth']=df['cuSparse','GPU','bandwidth']
df['ellpack-bandwidth']=df['Ellpack','GPU','bandwidth']
df['sliced-ellpack-bandwidth']=df['SlicedEllpack','GPU','bandwidth']
df['bi-ellpack-bandwidth']=df['BiEllpack','GPU','bandwidth']
# sort by cuSparse
df.sort_values(by=["cuSparse-bandwidth"],inplace=True,ascending=False)
......@@ -125,6 +143,12 @@ df.sort_values(by=["sliced-ellpack-bandwidth"],inplace=True,ascending=False)
sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist();
sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist();
# sort by BiEllpack
df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False)
df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False)
bi_ellpack_gpu_list = df["BiEllpack", "GPU", "bandwidth"].tolist();
bi_ellpack_legacy_gpu_list = df["BiEllpack Legacy", "GPU", "bandwidth"].tolist();
print( "Writing gnuplot files..." )
cuSparse_file = open( "cusparse.gplt", "w" )
......@@ -155,7 +179,16 @@ for x in sliced_ellpack_gpu_list:
if str( sliced_ellpack_legacy_gpu_list[ i ] ) != "nan":
sliced_ellpack_file.write( f"{i+1} {x} {sliced_ellpack_legacy_gpu_list[ i ]}\n" )
i = i + 1
ellpack_file.close()
sliced_ellpack_file.close()
bi_ellpack_file = open( "bi-ellpack.gplt", "w" )
i = 0;
for x in bi_ellpack_gpu_list:
if str( x ) != "nan":
if str( bi_ellpack_legacy_gpu_list[ i ] ) != "nan":
bi_ellpack_file.write( f"{i+1} {x} {bi_ellpack_legacy_gpu_list[ i ]}\n" )
i = i + 1
bi_ellpack_file.close()
print( "Generating Gnuplot file..." )
......@@ -179,18 +212,21 @@ gnuplot_file.write( " 'cusparse.gplt' using 1:5 title 'Sliced Ellpack' with
gnuplot_file.write( " 'cusparse.gplt' using 1:6 title 'Sliced Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'chunked-ellpack-vs-cusparse.eps'\n" )
gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
#gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'bi-ellpack-vs-cusparse.eps'\n" )
gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
#gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( " 'cusparse.gplt' using 1:8 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'ellpack-vs-ellpack-legacy.eps'\n" )
gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'sliced-ellpack-vs-sliced-ellpack-legacy.eps'\n" )
gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "plot 'sliced-ellpack.gplt' using 1:2 title 'SlicedEllpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( " 'sliced-ellpack.gplt' using 1:3 title 'SlicedEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'bi-ellpack-vs-bi-ellpack-legacy.eps'\n" )
gnuplot_file.write( "plot 'bi-ellpack.gplt' using 1:2 title 'BiEllpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( " 'bi-ellpack.gplt' using 1:3 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.close()
print( "Executing Gnuplot ..." )
......@@ -203,6 +239,7 @@ os.system( "epstopdf --autorotate All chunked-ellpack-vs-cusparse.eps" )
os.system( "epstopdf --autorotate All bi-ellpack-vs-cusparse.eps" )
os.system( "epstopdf --autorotate All ellpack-vs-ellpack-legacy.eps" )
os.system( "epstopdf --autorotate All sliced-ellpack-vs-sliced-ellpack-legacy.eps" )
os.system( "epstopdf --autorotate All bi-ellpack-vs-bi-ellpack-legacy.eps" )
print( "Deleting temprary files..." )
os.system( "rm cusparse.gplt" )
......