Commit 0081833b authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'TO/matrices' into 'develop'

Fixed SpMV becnhmark results processing.

See merge request !66
parents 3c945fad 8683177d
Loading
Loading
Loading
Loading
+7 −4
Original line number Diff line number Diff line
@@ -27,12 +27,14 @@ struct SpmvBenchmarkResult
   using HostVector = Containers::Vector< Real, Devices::Host, Index >;
   using BenchmarkVector = Containers::Vector< Real, Device, Index >;

   SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult )
   : csrResult( csrResult ), benchmarkResult( benchmarkResult ){};
   SpmvBenchmarkResult( const HostVector& csrResult,
                        const BenchmarkVector& benchmarkResult,
                        const IndexType nonzeros )
   : csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};

   virtual HeaderElements getTableHeader() const override
   {
      return HeaderElements( {"time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
      return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
   }

   virtual RowElements getRowElements() const override
@@ -41,7 +43,7 @@ struct SpmvBenchmarkResult
      benchmarkResultCopy = benchmarkResult;
      auto diff = csrResult - benchmarkResultCopy;
      RowElements elements;
      elements << time << stddev << stddev/time << bandwidth;
      elements << nonzeros << time << stddev << stddev/time << bandwidth;
      if( speedup != 0.0 )
         elements << speedup;
      else elements << "N/A";
@@ -51,6 +53,7 @@ struct SpmvBenchmarkResult

   const HostVector& csrResult;
   const BenchmarkVector& benchmarkResult;
   const IndexType nonzeros;
};
   
} //namespace Benchmarks
+9 −7
Original line number Diff line number Diff line
@@ -158,7 +158,7 @@ benchmarkSpMV( Benchmark& benchmark,

   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) },
         //{ "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) },
         { "rows", convertToString( hostMatrix.getRows() ) },
         { "columns", convertToString( hostMatrix.getColumns() ) },
         { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
@@ -181,7 +181,7 @@ benchmarkSpMV( Benchmark& benchmark,
      hostMatrix.vectorProduct( hostInVector, hostOutVector );

   };
   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );

   /***
@@ -199,7 +199,7 @@ benchmarkSpMV( Benchmark& benchmark,
   auto spmvCuda = [&]() {
      cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
   };
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
 #endif
    std::cout << std::endl;
@@ -233,7 +233,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
   //
   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
         //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
         { "rows", convertToString( csrHostMatrix.getRows() ) },
         { "columns", convertToString( csrHostMatrix.getColumns() ) },
         { "matrix format", String( "CSR" ) }
@@ -250,7 +250,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
       csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
   };

   benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost );
   SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults );

   ////
   // Perform benchmark on CUDA device with cuSparse as a reference GPU format
@@ -258,7 +259,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
#ifdef HAVE_CUDA
   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
         { "matrix name", convertToString( inputFileName ) },
         { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
         //{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
         { "rows", convertToString( csrHostMatrix.getRows() ) },
         { "columns", convertToString( csrHostMatrix.getColumns() ) },
         { "matrix format", String( "cuSparse" ) }
@@ -286,7 +287,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
       cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector );
   };

   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
   SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
#endif

   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar    >( benchmark, hostOutVector, inputFileName, verboseMR );
+50 −13
Original line number Diff line number Diff line
@@ -60,21 +60,36 @@ df.sort_index(axis=1, inplace=True)

# Drop CPU speedup
df.drop(columns=('BiEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('BiEllpack', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('CSR', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('CSR Legacy', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Adaptive', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Light', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Scalar', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Stream', 'CPU','speedup'), axis=1, inplace=True )
#df.drop(columns=('CSR Legacy Vector', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('ChunkedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('Ellpack', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('Ellpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('SlicedEllpack', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('SlicedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True )
#df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True )

print( "Exporting data frame to log.html..." )
pandas.options.display.float_format = '{:,.4f}'.format
df.to_html("log.html")

print( "Computing speed-up of formats...")
# Add speedup compared to CSR and cuSparse
df["BiEllpack Legacy",      "CPU", "CSR speedup"]      = df["BiEllpack Legacy",      "CPU", "time"] / df["CSR",      "CPU", "time"]
df["BiEllpack Legacy",      "GPU", "cuSparse speedup"] = df["BiEllpack Legacy",      "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["BiEllpack",             "CPU", "CSR speedup"]      = df["BiEllpack",             "CPU", "time"] / df["CSR",      "CPU", "time"]
df["BiEllpacky",            "GPU", "cuSparse speedup"] = df["BiEllpack",             "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["CSR",                   "GPU", "cuSparse speedup"] = df["CSR",                   "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["CSR Legacy",            "GPU", "cuSparse speedup"] = df["CSR Legacy",            "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Adaptive",   "GPU", "cuSparse speedup"] = df["CSR Legacy Adaptive",   "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Light",      "GPU", "cuSparse speedup"] = df["CSR Legacy Light",      "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Scalar",     "GPU", "cuSparse speedup"] = df["CSR Legacy Scalar",     "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Stream",     "GPU", "cuSparse speedup"] = df["CSR Legacy Stream",     "GPU", "time"] / df["cuSparse", "GPU", "time"]
#df["CSR Legacy Vector",     "GPU", "cuSparse speedup"] = df["CSR Legacy Vector",     "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["ChunkedEllpack Legacy", "CPU", "CSR speedup"]      = df["ChunkedEllpack Legacy", "CPU", "time"] / df["CSR",      "CPU", "time"]
df["ChunkedEllpack Legacy", "GPU", "cuSparse speedup"] = df["ChunkedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"]
df["Ellpack Legacy",        "CPU", "CSR speedup"]      = df["Ellpack Legacy",        "CPU", "time"] / df["CSR",      "CPU", "time"]
@@ -87,22 +102,25 @@ df["SlicedEllpack", "CPU", "CSR speedup"] = df["SlicedEllpack",
df["SlicedEllpack",         "GPU", "cuSparse speedup"] = df["SlicedEllpack",         "GPU", "time"] / df["cuSparse", "GPU", "time"]

# Add speedup compared to legacy formats
df["CSR",                   "GPU", "Legacy speedup"]   = df["CSR",                   "GPU", "time"] / df["CSR Legacy",           "GPU", "time"]
df["CSR",                   "CPU", "Legacy speedup"]   = df["CSR",                   "CPU", "time"] / df["CSR Legacy",           "CPU", "time"]
df["CSR",                   "GPU", "Legacy speedup"]   = df["CSR",                   "GPU", "time"] / df["CSR Legacy Scalar",    "GPU", "time"]
df["CSR",                   "CPU", "Legacy speedup"]   = df["CSR",                   "CPU", "time"] / df["CSR Legacy Scalar",    "CPU", "time"]
df["Ellpack",               "GPU", "Legacy speedup"]   = df["Ellpack",               "GPU", "time"] / df["Ellpack Legacy",       "GPU", "time"]
df["Ellpack",               "CPU", "Legacy speedup"]   = df["Ellpack",               "CPU", "time"] / df["Ellpack Legacy",       "CPU", "time"]
df["SlicedEllpack",         "GPU", "Legacy speedup"]   = df["SlicedEllpack",         "GPU", "time"] / df["SlicedEllpack Legacy", "GPU", "time"]
df["SlicedEllpack",         "CPU", "Legacy speedup"]   = df["SlicedEllpack",         "CPU", "time"] / df["SlicedEllpack Legacy", "CPU", "time"]
df["BiEllpack",             "GPU", "Legacy speedup"]   = df["BiEllpack",             "GPU", "time"] / df["BiEllpack Legacy",     "GPU", "time"]
df["BiEllpack",             "CPU", "Legacy speedup"]   = df["BiEllpack",             "CPU", "time"] / df["BiEllpack Legacy",     "CPU", "time"]

print( "Exporting data frame to log.html..." )
pandas.options.display.float_format = '{:,.4f}'.format
df.to_html("log.html")
#print( "Exporting data frame to log.html..." )
#pandas.options.display.float_format = '{:,.4f}'.format
#df.to_html("log.html")

# extract columns of reference formats on GPU
print( "Preparing data for graph analysis..." )
df['cuSparse-bandwidth']=df['cuSparse','GPU','bandwidth']
df['ellpack-bandwidth']=df['Ellpack','GPU','bandwidth']
df['sliced-ellpack-bandwidth']=df['SlicedEllpack','GPU','bandwidth']
df['bi-ellpack-bandwidth']=df['BiEllpack','GPU','bandwidth']

# sort by cuSparse
df.sort_values(by=["cuSparse-bandwidth"],inplace=True,ascending=False)
@@ -125,6 +143,12 @@ df.sort_values(by=["sliced-ellpack-bandwidth"],inplace=True,ascending=False)
sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist();
sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist();

# sort by BiEllpack
df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False)
df.sort_values(by=["bi-ellpack-bandwidth"],inplace=True,ascending=False)
bi_ellpack_gpu_list = df["BiEllpack", "GPU", "bandwidth"].tolist();
bi_ellpack_legacy_gpu_list = df["BiEllpack Legacy", "GPU", "bandwidth"].tolist();

print( "Writing gnuplot files..." )

cuSparse_file = open( "cusparse.gplt", "w" )
@@ -155,7 +179,16 @@ for x in sliced_ellpack_gpu_list:
      if str( sliced_ellpack_legacy_gpu_list[ i ] ) != "nan":
         sliced_ellpack_file.write( f"{i+1} {x} {sliced_ellpack_legacy_gpu_list[ i ]}\n" )
   i = i + 1
ellpack_file.close()
sliced_ellpack_file.close()

bi_ellpack_file = open( "bi-ellpack.gplt", "w" )
i = 0;
for x in bi_ellpack_gpu_list:
   if str( x ) != "nan":
      if str( bi_ellpack_legacy_gpu_list[ i ] ) != "nan":
         bi_ellpack_file.write( f"{i+1} {x} {bi_ellpack_legacy_gpu_list[ i ]}\n" )
   i = i + 1
bi_ellpack_file.close()

print( "Generating Gnuplot file..." )

@@ -179,18 +212,21 @@ gnuplot_file.write( " 'cusparse.gplt' using 1:5 title 'Sliced Ellpack' with
gnuplot_file.write( "     'cusparse.gplt' using 1:6 title 'Sliced Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'chunked-ellpack-vs-cusparse.eps'\n" )
gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
#gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'Chunked Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'bi-ellpack-vs-cusparse.eps'\n" )
gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
#gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" )
gnuplot_file.write( "     'cusparse.gplt' using 1:8 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'ellpack-vs-ellpack-legacy.eps'\n" )
gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( "     'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'sliced-ellpack-vs-sliced-ellpack-legacy.eps'\n" )
gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( "     'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "plot 'sliced-ellpack.gplt' using 1:2 title 'SlicedEllpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( "     'sliced-ellpack.gplt' using 1:3 title 'SlicedEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.write( "set output 'bi-ellpack-vs-bi-ellpack-legacy.eps'\n" )
gnuplot_file.write( "plot 'bi-ellpack.gplt' using 1:2 title 'BiEllpack' with lines linewidth 2 lt rgb 'red', \\\n" )
gnuplot_file.write( "     'bi-ellpack.gplt' using 1:3 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
gnuplot_file.close()

print( "Executing Gnuplot ..." )
@@ -203,6 +239,7 @@ os.system( "epstopdf --autorotate All chunked-ellpack-vs-cusparse.eps" )
os.system( "epstopdf --autorotate All bi-ellpack-vs-cusparse.eps" )
os.system( "epstopdf --autorotate All ellpack-vs-ellpack-legacy.eps" )
os.system( "epstopdf --autorotate All sliced-ellpack-vs-sliced-ellpack-legacy.eps" )
os.system( "epstopdf --autorotate All bi-ellpack-vs-bi-ellpack-legacy.eps" )

print( "Deleting temprary files..." )
os.system( "rm cusparse.gplt" )