Loading src/Benchmarks/Benchmarks.h +13 −7 Original line number Diff line number Diff line Loading @@ -34,10 +34,11 @@ namespace Benchmarks { const double oneGB = 1024.0 * 1024.0 * 1024.0; template< typename Logger = Logging > struct BenchmarkResult { using HeaderElements = Logging::HeaderElements; using RowElements = Logging::RowElements; using HeaderElements = typename Logger::HeaderElements; using RowElements = typename Logger::RowElements; double time = std::numeric_limits<double>::quiet_NaN(); double stddev = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -71,6 +72,11 @@ public: using typename Logger::MetadataColumns; using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >; using typename Logger::CommonLogs; using Logger::addCommonLogs; using Logger::addLogsMetadata; using Logger::writeHeader; Benchmark( int loops = 10, bool verbose = true ) : Logger(verbose), loops(loops) Loading Loading @@ -202,7 +208,7 @@ public: time( ResetFunction reset, const String & performer, ComputeFunction & compute, BenchmarkResult & result ) BenchmarkResult< Logger > & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -247,7 +253,7 @@ public: const String & performer, ComputeFunction & compute ) { BenchmarkResult result; BenchmarkResult< Logger > result; return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result ); } Loading @@ -259,7 +265,7 @@ public: double time( const String & performer, ComputeFunction & compute, BenchmarkResult & result ) BenchmarkResult< Logger > & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -295,7 +301,7 @@ public: time( const String & performer, ComputeFunction & compute ) { BenchmarkResult result; BenchmarkResult< Logger > result; return time< Device, ComputeFunction >( performer, compute, result ); } Loading @@ -310,7 +316,7 @@ public: std::cerr << msg << std::endl; } using Logging::save; using Logger::save; SolverMonitorType& getMonitor() { return monitor; Loading src/Benchmarks/JsonLogging.h +45 −2 Original line number Diff line number Diff line Loading @@ -68,6 +68,7 @@ class JsonLoggingRowElements auto cend() const noexcept { return elements.cend(); } size_t size() const noexcept { return this->elements.size(); }; protected: std::list< String > elements; Loading @@ -81,8 +82,11 @@ public: using MetadataMap = std::map< const char*, String >; using MetadataColumns = std::vector<MetadataElement>; using CommonLogs = std::vector< std::pair< const char*, String > >; using LogsMetadata = std::vector< String >; using HeaderElements = std::vector< String >; using RowElements = LoggingRowElements; using RowElements = JsonLoggingRowElements; JsonLogging( int verbose = true ) : verbose(verbose) Loading @@ -94,6 +98,42 @@ public: this->verbose = verbose; } void addCommonLogs( const CommonLogs& logs ) { for( auto lg : logs ) { if( verbose ) std::cout << lg.first << " = " << lg.second << std::endl; log << "\"" << lg.first << "\" = \"" << lg.second << std::endl; } }; void resetLogsMetadat() { this->logsMetadata.clear(); }; void addLogsMetadata( const std::vector< String >& md ) { this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() ); } void writeHeader() { for( auto md : this->logsMetadata ) std::cout << md << "\t"; std::cout << std::endl; } void writeRow( const RowElements& rowEls ) { TNL_ASSERT_EQ( rowEls.size(), this->logsMetadata.size(), "" ); auto md = this->logsMetadata.begin(); for( auto el : rowEls ) { if( verbose ) std::cout << el << "\t"; log << " \"" << *md++ << "\" = \"" << el << "," << std::endl; } } void writeTitle( const String & title ) { Loading Loading @@ -178,7 +218,7 @@ public: std::cout << std::setw( 20 ) << it.second; } // spanning element is printed as usual column to stdout std::cout << std::setw( 15 ) << spanningElement; //std::cout << std::setw( 15 ) << spanningElement; for( auto & it : subElements ) { std::cout << std::setw( 15 ) << it; } Loading Loading @@ -279,6 +319,9 @@ protected: MetadataColumns metadataColumns; bool header_changed = true; std::vector< std::pair< String, int > > horizontalGroups; // new JSON implementation LogsMetadata logsMetadata; }; } // namespace Benchmarks Loading src/Benchmarks/Logging.h +15 −0 Original line number Diff line number Diff line Loading @@ -81,6 +81,8 @@ public: using MetadataMap = std::map< const char*, String >; using MetadataColumns = std::vector<MetadataElement>; using CommonLogs = std::vector< std::pair< const char*, String > >; using HeaderElements = std::vector< String >; using RowElements = LoggingRowElements; Loading @@ -102,6 +104,19 @@ public: log << ": title = " << title << std::endl; } void addCommonLogs( const CommonLogs& logs ) { for( auto log : logs ) { if( verbose ) std::cout << log.first << " = " << log.second << std::endl; } }; void addLogsMetadata( const std::vector< String >& md ){}; void writeHeader(){}; void writeMetadata( const MetadataMap & metadata ) { Loading src/Benchmarks/SpMV/SpmvBenchmarkResult.h +21 −7 Original line number Diff line number Diff line Loading @@ -17,9 +17,10 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > typename Index, typename Logger = JsonLogging > struct SpmvBenchmarkResult : public BenchmarkResult : public BenchmarkResult< Logger > { using RealType = Real; using DeviceType = Device; Loading @@ -27,23 +28,35 @@ struct SpmvBenchmarkResult using HostVector = Containers::Vector< Real, Devices::Host, Index >; using BenchmarkVector = Containers::Vector< Real, Device, Index >; SpmvBenchmarkResult( const HostVector& csrResult, using typename Logger::HeaderElements; using typename Logger::RowElements; using BenchmarkResult< Logger >::stddev; using BenchmarkResult< Logger >::bandwidth; using BenchmarkResult< Logger >::speedup; SpmvBenchmarkResult( const String& format, const HostVector& csrResult, const BenchmarkVector& benchmarkResult, const IndexType nonzeros ) : csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){}; : format( format ), csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){}; virtual HeaderElements getTableHeader() const override { return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); return HeaderElements( {"format", "device", "non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); } void setFormat( const String& format ) { this->format = format; }; virtual RowElements getRowElements() const override { HostVector benchmarkResultCopy; benchmarkResultCopy = benchmarkResult; auto diff = csrResult - benchmarkResultCopy; RowElements elements; elements << nonzeros << time << stddev << stddev/time << bandwidth; elements << format << ( std::is_same< Device, Devices::Host >::value ? "CPU" : "GPU" ) << nonzeros << time << stddev << stddev/time << bandwidth; if( speedup != 0.0 ) elements << speedup; else elements << "N/A"; Loading @@ -51,6 +64,7 @@ struct SpmvBenchmarkResult return elements; } String format; const HostVector& csrResult; const BenchmarkVector& benchmarkResult; const IndexType nonzeros; Loading src/Benchmarks/SpMV/spmv.h +40 −26 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ #include <cstdint> #include "../Benchmarks.h" #include "../JsonLogging.h" #include "SpmvBenchmarkResult.h" #include <TNL/Pointers/DevicePointer.h> Loading Loading @@ -58,7 +59,9 @@ using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { namespace SpMVLegacy { namespace SpMV { using BenchmarkType = TNL::Benchmarks::Benchmark< JsonLogging >; ///// // General sparse matrix aliases Loading Loading @@ -218,7 +221,7 @@ std::string getFormatShort( const Matrix& matrix ) } // Print information about the matrix. template< typename Matrix > /*template< typename Matrix > void printMatrixInfo( const Matrix& matrix, std::ostream& str ) { Loading @@ -226,13 +229,13 @@ void printMatrixInfo( const Matrix& matrix, str << " Rows: " << matrix.getRows() << std::endl; str << " Cols: " << matrix.getColumns() << std::endl; str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl; } }*/ template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename, typename > class Vector = Containers::Vector > void benchmarkSpMVLegacy( Benchmark<>& benchmark, benchmarkSpMVLegacy( BenchmarkType& benchmark, const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector, const String& inputFileName, bool verboseMR ) Loading @@ -247,12 +250,12 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark, SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({ /*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", MatrixInfo< HostMatrix >::getFormat() } } )); } ));*/ const int elements = hostMatrix.getNonzeroElementsCount(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; benchmark.setOperation( datasetSize ); Loading @@ -271,7 +274,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark, hostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); ///// Loading @@ -289,7 +292,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark, auto spmvCuda = [&]() { cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif std::cout << std::endl; Loading @@ -300,7 +303,7 @@ template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename, typename > class Vector = Containers::Vector > void benchmarkSpMV( Benchmark<>& benchmark, benchmarkSpMV( BenchmarkType& benchmark, const InputMatrix& inputMatrix, const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector, const String& inputFileName, Loading @@ -322,7 +325,7 @@ benchmarkSpMV( Benchmark<>& benchmark, return; } benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({ benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, Loading @@ -346,7 +349,7 @@ benchmarkSpMV( Benchmark<>& benchmark, hostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); ///// Loading @@ -365,7 +368,7 @@ benchmarkSpMV( Benchmark<>& benchmark, auto spmvCuda = [&]() { cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif std::cout << std::endl; Loading @@ -374,7 +377,7 @@ benchmarkSpMV( Benchmark<>& benchmark, template< typename Real = double, typename Index = int > void benchmarkSpmv( Benchmark<>& benchmark, benchmarkSpmv( BenchmarkType& benchmark, const String& inputFileName, const Config::ParameterContainer& parameters, bool verboseMR ) Loading Loading @@ -417,12 +420,17 @@ benchmarkSpmv( Benchmark<>& benchmark, //// // Perform benchmark on host with CSR as a reference CPU format // benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({ benchmark.addCommonLogs( BenchmarkType::CommonLogs( { { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) } } ) ); /*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR" ) } } )); } ));*/ HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() ); Loading @@ -435,19 +443,21 @@ benchmarkSpmv( Benchmark<>& benchmark, csrHostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults ); SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader() ); benchmark.writeHeader(); benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults ); #ifdef HAVE_CUDA //// // Perform benchmark on CUDA device with cuSparse as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "cuSparse" ) } } )); } ));*/ cusparseHandle_t cusparseHandle; cusparseCreate( &cusparseHandle ); Loading @@ -469,19 +479,20 @@ benchmarkSpmv( Benchmark<>& benchmark, cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( String( "cusprase" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); #ifdef HAVE_CSR5 //// // Perform benchmark on CUDA device with CSR5 as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ cudaBenchmarkResults.setFormat( String( "CSR5" ) ); /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR5" ) } } )); } ));*/ CudaVector cudaOutVector2( cudaOutVector ); CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector ); Loading @@ -489,6 +500,7 @@ benchmarkSpmv( Benchmark<>& benchmark, auto csr5SpMV = [&]() { csr5Benchmark.vectorProduct(); }; benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults ); std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl; csrCudaMatrix.reset(); Loading @@ -497,12 +509,13 @@ benchmarkSpmv( Benchmark<>& benchmark, //// // Perform benchmark on CUDA device with LightSpMV as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ cudaBenchmarkResults.setFormat( String( "LightSpMV Vector" ) ); /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "LightSpMV Vector" ) } } )); } ));*/ LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix; lightSpMVCSRHostMatrix = csrHostMatrix; Loading @@ -516,12 +529,13 @@ benchmarkSpmv( Benchmark<>& benchmark, }; benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ cudaBenchmarkResults.setFormat( String( "LightSpMV Warp" ) ); /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "LightSpMV Warp" ) } } )); } ));*/ lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp ); benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults ); #endif Loading Loading
src/Benchmarks/Benchmarks.h +13 −7 Original line number Diff line number Diff line Loading @@ -34,10 +34,11 @@ namespace Benchmarks { const double oneGB = 1024.0 * 1024.0 * 1024.0; template< typename Logger = Logging > struct BenchmarkResult { using HeaderElements = Logging::HeaderElements; using RowElements = Logging::RowElements; using HeaderElements = typename Logger::HeaderElements; using RowElements = typename Logger::RowElements; double time = std::numeric_limits<double>::quiet_NaN(); double stddev = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -71,6 +72,11 @@ public: using typename Logger::MetadataColumns; using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >; using typename Logger::CommonLogs; using Logger::addCommonLogs; using Logger::addLogsMetadata; using Logger::writeHeader; Benchmark( int loops = 10, bool verbose = true ) : Logger(verbose), loops(loops) Loading Loading @@ -202,7 +208,7 @@ public: time( ResetFunction reset, const String & performer, ComputeFunction & compute, BenchmarkResult & result ) BenchmarkResult< Logger > & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -247,7 +253,7 @@ public: const String & performer, ComputeFunction & compute ) { BenchmarkResult result; BenchmarkResult< Logger > result; return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result ); } Loading @@ -259,7 +265,7 @@ public: double time( const String & performer, ComputeFunction & compute, BenchmarkResult & result ) BenchmarkResult< Logger > & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -295,7 +301,7 @@ public: time( const String & performer, ComputeFunction & compute ) { BenchmarkResult result; BenchmarkResult< Logger > result; return time< Device, ComputeFunction >( performer, compute, result ); } Loading @@ -310,7 +316,7 @@ public: std::cerr << msg << std::endl; } using Logging::save; using Logger::save; SolverMonitorType& getMonitor() { return monitor; Loading
src/Benchmarks/JsonLogging.h +45 −2 Original line number Diff line number Diff line Loading @@ -68,6 +68,7 @@ class JsonLoggingRowElements auto cend() const noexcept { return elements.cend(); } size_t size() const noexcept { return this->elements.size(); }; protected: std::list< String > elements; Loading @@ -81,8 +82,11 @@ public: using MetadataMap = std::map< const char*, String >; using MetadataColumns = std::vector<MetadataElement>; using CommonLogs = std::vector< std::pair< const char*, String > >; using LogsMetadata = std::vector< String >; using HeaderElements = std::vector< String >; using RowElements = LoggingRowElements; using RowElements = JsonLoggingRowElements; JsonLogging( int verbose = true ) : verbose(verbose) Loading @@ -94,6 +98,42 @@ public: this->verbose = verbose; } void addCommonLogs( const CommonLogs& logs ) { for( auto lg : logs ) { if( verbose ) std::cout << lg.first << " = " << lg.second << std::endl; log << "\"" << lg.first << "\" = \"" << lg.second << std::endl; } }; void resetLogsMetadat() { this->logsMetadata.clear(); }; void addLogsMetadata( const std::vector< String >& md ) { this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() ); } void writeHeader() { for( auto md : this->logsMetadata ) std::cout << md << "\t"; std::cout << std::endl; } void writeRow( const RowElements& rowEls ) { TNL_ASSERT_EQ( rowEls.size(), this->logsMetadata.size(), "" ); auto md = this->logsMetadata.begin(); for( auto el : rowEls ) { if( verbose ) std::cout << el << "\t"; log << " \"" << *md++ << "\" = \"" << el << "," << std::endl; } } void writeTitle( const String & title ) { Loading Loading @@ -178,7 +218,7 @@ public: std::cout << std::setw( 20 ) << it.second; } // spanning element is printed as usual column to stdout std::cout << std::setw( 15 ) << spanningElement; //std::cout << std::setw( 15 ) << spanningElement; for( auto & it : subElements ) { std::cout << std::setw( 15 ) << it; } Loading Loading @@ -279,6 +319,9 @@ protected: MetadataColumns metadataColumns; bool header_changed = true; std::vector< std::pair< String, int > > horizontalGroups; // new JSON implementation LogsMetadata logsMetadata; }; } // namespace Benchmarks Loading
src/Benchmarks/Logging.h +15 −0 Original line number Diff line number Diff line Loading @@ -81,6 +81,8 @@ public: using MetadataMap = std::map< const char*, String >; using MetadataColumns = std::vector<MetadataElement>; using CommonLogs = std::vector< std::pair< const char*, String > >; using HeaderElements = std::vector< String >; using RowElements = LoggingRowElements; Loading @@ -102,6 +104,19 @@ public: log << ": title = " << title << std::endl; } void addCommonLogs( const CommonLogs& logs ) { for( auto log : logs ) { if( verbose ) std::cout << log.first << " = " << log.second << std::endl; } }; void addLogsMetadata( const std::vector< String >& md ){}; void writeHeader(){}; void writeMetadata( const MetadataMap & metadata ) { Loading
src/Benchmarks/SpMV/SpmvBenchmarkResult.h +21 −7 Original line number Diff line number Diff line Loading @@ -17,9 +17,10 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > typename Index, typename Logger = JsonLogging > struct SpmvBenchmarkResult : public BenchmarkResult : public BenchmarkResult< Logger > { using RealType = Real; using DeviceType = Device; Loading @@ -27,23 +28,35 @@ struct SpmvBenchmarkResult using HostVector = Containers::Vector< Real, Devices::Host, Index >; using BenchmarkVector = Containers::Vector< Real, Device, Index >; SpmvBenchmarkResult( const HostVector& csrResult, using typename Logger::HeaderElements; using typename Logger::RowElements; using BenchmarkResult< Logger >::stddev; using BenchmarkResult< Logger >::bandwidth; using BenchmarkResult< Logger >::speedup; SpmvBenchmarkResult( const String& format, const HostVector& csrResult, const BenchmarkVector& benchmarkResult, const IndexType nonzeros ) : csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){}; : format( format ), csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){}; virtual HeaderElements getTableHeader() const override { return HeaderElements( {"non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); return HeaderElements( {"format", "device", "non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); } void setFormat( const String& format ) { this->format = format; }; virtual RowElements getRowElements() const override { HostVector benchmarkResultCopy; benchmarkResultCopy = benchmarkResult; auto diff = csrResult - benchmarkResultCopy; RowElements elements; elements << nonzeros << time << stddev << stddev/time << bandwidth; elements << format << ( std::is_same< Device, Devices::Host >::value ? "CPU" : "GPU" ) << nonzeros << time << stddev << stddev/time << bandwidth; if( speedup != 0.0 ) elements << speedup; else elements << "N/A"; Loading @@ -51,6 +64,7 @@ struct SpmvBenchmarkResult return elements; } String format; const HostVector& csrResult; const BenchmarkVector& benchmarkResult; const IndexType nonzeros; Loading
src/Benchmarks/SpMV/spmv.h +40 −26 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ #include <cstdint> #include "../Benchmarks.h" #include "../JsonLogging.h" #include "SpmvBenchmarkResult.h" #include <TNL/Pointers/DevicePointer.h> Loading Loading @@ -58,7 +59,9 @@ using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { namespace SpMVLegacy { namespace SpMV { using BenchmarkType = TNL::Benchmarks::Benchmark< JsonLogging >; ///// // General sparse matrix aliases Loading Loading @@ -218,7 +221,7 @@ std::string getFormatShort( const Matrix& matrix ) } // Print information about the matrix. template< typename Matrix > /*template< typename Matrix > void printMatrixInfo( const Matrix& matrix, std::ostream& str ) { Loading @@ -226,13 +229,13 @@ void printMatrixInfo( const Matrix& matrix, str << " Rows: " << matrix.getRows() << std::endl; str << " Cols: " << matrix.getColumns() << std::endl; str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl; } }*/ template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename, typename > class Vector = Containers::Vector > void benchmarkSpMVLegacy( Benchmark<>& benchmark, benchmarkSpMVLegacy( BenchmarkType& benchmark, const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector, const String& inputFileName, bool verboseMR ) Loading @@ -247,12 +250,12 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark, SpMV::ReferenceFormats::Legacy::LegacyMatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({ /*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", MatrixInfo< HostMatrix >::getFormat() } } )); } ));*/ const int elements = hostMatrix.getNonzeroElementsCount(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; benchmark.setOperation( datasetSize ); Loading @@ -271,7 +274,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark, hostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); ///// Loading @@ -289,7 +292,7 @@ benchmarkSpMVLegacy( Benchmark<>& benchmark, auto spmvCuda = [&]() { cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif std::cout << std::endl; Loading @@ -300,7 +303,7 @@ template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename, typename > class Vector = Containers::Vector > void benchmarkSpMV( Benchmark<>& benchmark, benchmarkSpMV( BenchmarkType& benchmark, const InputMatrix& inputMatrix, const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector, const String& inputFileName, Loading @@ -322,7 +325,7 @@ benchmarkSpMV( Benchmark<>& benchmark, return; } benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({ benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, Loading @@ -346,7 +349,7 @@ benchmarkSpMV( Benchmark<>& benchmark, hostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); ///// Loading @@ -365,7 +368,7 @@ benchmarkSpMV( Benchmark<>& benchmark, auto spmvCuda = [&]() { cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif std::cout << std::endl; Loading @@ -374,7 +377,7 @@ benchmarkSpMV( Benchmark<>& benchmark, template< typename Real = double, typename Index = int > void benchmarkSpmv( Benchmark<>& benchmark, benchmarkSpmv( BenchmarkType& benchmark, const String& inputFileName, const Config::ParameterContainer& parameters, bool verboseMR ) Loading Loading @@ -417,12 +420,17 @@ benchmarkSpmv( Benchmark<>& benchmark, //// // Perform benchmark on host with CSR as a reference CPU format // benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({ benchmark.addCommonLogs( BenchmarkType::CommonLogs( { { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) } } ) ); /*benchmark.setMetadataColumns( BenchmarkType::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR" ) } } )); } ));*/ HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() ); Loading @@ -435,19 +443,21 @@ benchmarkSpmv( Benchmark<>& benchmark, csrHostMatrix.vectorProduct( hostInVector, hostOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults ); SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader() ); benchmark.writeHeader(); benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults ); #ifdef HAVE_CUDA //// // Perform benchmark on CUDA device with cuSparse as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "cuSparse" ) } } )); } ));*/ cusparseHandle_t cusparseHandle; cusparseCreate( &cusparseHandle ); Loading @@ -469,19 +479,20 @@ benchmarkSpmv( Benchmark<>& benchmark, cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector ); }; SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( String( "cusprase" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults ); #ifdef HAVE_CSR5 //// // Perform benchmark on CUDA device with CSR5 as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ cudaBenchmarkResults.setFormat( String( "CSR5" ) ); /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR5" ) } } )); } ));*/ CudaVector cudaOutVector2( cudaOutVector ); CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector ); Loading @@ -489,6 +500,7 @@ benchmarkSpmv( Benchmark<>& benchmark, auto csr5SpMV = [&]() { csr5Benchmark.vectorProduct(); }; benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults ); std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl; csrCudaMatrix.reset(); Loading @@ -497,12 +509,13 @@ benchmarkSpmv( Benchmark<>& benchmark, //// // Perform benchmark on CUDA device with LightSpMV as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ cudaBenchmarkResults.setFormat( String( "LightSpMV Vector" ) ); /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "LightSpMV Vector" ) } } )); } ));*/ LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix; lightSpMVCSRHostMatrix = csrHostMatrix; Loading @@ -516,12 +529,13 @@ benchmarkSpmv( Benchmark<>& benchmark, }; benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ cudaBenchmarkResults.setFormat( String( "LightSpMV Warp" ) ); /*benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "LightSpMV Warp" ) } } )); } ));*/ lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp ); benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults ); #endif Loading