Loading src/Benchmarks/SpMV/spmv.h +61 −101 Original line number Diff line number Diff line Loading @@ -20,7 +20,8 @@ #include <TNL/Matrices/SlicedEllpack.h> #include <TNL/Matrices/ChunkedEllpack.h> //#include <TNL/Matrices/MatrixReader.h> #include <TNL/Matrices/MatrixReader.h> using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { Loading @@ -29,16 +30,6 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; //template< typename Matrix > //void printMatrixInfo( const String& inputFileName, // const Matrix& matrix, // std::ostream& str ) //{ // str << " Rows: " << std::setw( 8 ) << matrix.getRows(); // str << " Columns: " << std::setw( 8 ) << matrix.getColumns(); // str << " Nonzero Elements: " << std::setw( 10 ) << matrix.getNumberOfNonzeroMatrixElements(); //} template< typename Matrix > int setHostTestMatrix( Matrix& matrix, const int elementsPerRow ) Loading Loading @@ -106,8 +97,7 @@ template< typename Real, template< typename, typename, typename > class Vector = Containers::Vector > bool benchmarkSpMV( Benchmark & benchmark, const int & size, const int elementsPerRow = 5 ) const String & inputFileName ) { typedef Matrix< Real, Devices::Host, int > HostMatrix; typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix; Loading @@ -116,44 +106,26 @@ benchmarkSpMV( Benchmark & benchmark, HostMatrix hostMatrix; DeviceMatrix deviceMatrix; Containers::Vector< int, Devices::Host, int > hostRowLengths; Containers::Vector< int, Devices::Cuda, int > deviceRowLengths; HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; // create benchmark group const std::vector< String > parsedType = parseObjectType( HostMatrix::getType() ); #ifdef HAVE_CUDA benchmark.createHorizontalGroup( parsedType[ 0 ], 2 ); #else benchmark.createHorizontalGroup( parsedType[ 0 ], 1 ); #endif hostRowLengths.setSize( size ); hostMatrix.setDimensions( size, size ); hostVector.setSize( size ); hostVector2.setSize( size ); if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else { #ifdef HAVE_CUDA deviceRowLengths.setSize( size ); deviceMatrix.setDimensions( size, size ); deviceVector.setSize( size ); deviceVector2.setSize( size ); if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; #endif hostRowLengths.setValue( elementsPerRow ); #ifdef HAVE_CUDA deviceRowLengths.setValue( elementsPerRow ); #endif hostVector.setSize( hostMatrix.getColumns() ); hostVector2.setSize( hostMatrix.getRows() ); hostMatrix.setCompressedRowLengths( hostRowLengths ); #ifdef HAVE_CUDA deviceMatrix.setCompressedRowLengths( deviceRowLengths ); deviceVector.setSize( deviceMatrix.getColumns() ); deviceVector2.setSize( deviceMatrix.getRows() ); #endif const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow ); setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow ); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; // reset function auto reset = [&]() { hostVector.setValue( 1.0 ); Loading @@ -164,6 +136,10 @@ benchmarkSpMV( Benchmark & benchmark, #endif }; const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; // compute functions auto spmvHost = [&]() { hostMatrix.vectorProduct( hostVector, hostVector2 ); Loading @@ -177,35 +153,19 @@ benchmarkSpMV( Benchmark & benchmark, #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); #endif return true; } } template< typename Real = double, typename Index = int > bool benchmarkSpmvSynthetic( Benchmark & benchmark, const int & size, const int & elementsPerRow ) const String& inputFileName ) { // typedef Matrices::CSR< Real, Devices::Host, int > CSRType; // CSRType csrMatrix; // try // { // if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) // { // std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; // return false; // } // } // catch( std::bad_alloc ) // { // std::cerr << "Not enough memory to read the matrix." << std::endl; // return false; // } bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, size, elementsPerRow ); result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); Loading src/Benchmarks/SpMV/tnl-benchmark-spmv.h +43 −35 Original line number Diff line number Diff line Loading @@ -21,43 +21,51 @@ #include <Benchmarks/BLAS/vector-operations.h> #include "spmv.h" #include <TNL/Matrices/MatrixReader.h> using namespace TNL::Matrices; using namespace TNL; using namespace TNL::Benchmarks; //template< typename Matrix > //void printMatrixInfo( const String& inputFileName, // const Matrix& matrix, // std::ostream& str ) //{ // str << " Rows: " << std::setw( 8 ) << matrix.getRows(); // str << " Columns: " << std::setw( 8 ) << matrix.getColumns(); // str << " Nonzero Elements: " << std::setw( 10 ) << matrix.getNumberOfNonzeroMatrixElements(); //} template< typename Real > void runSpMVBenchmarks( Benchmark & benchmark, Benchmark::MetadataMap metadata, const std::size_t & size, const int & elementsPerRow ) const String & inputFileName ) { const String precision = getType< Real >(); metadata["precision"] = precision; // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) // Array operations benchmark.newBenchmark( String("Array operations (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "size", convertToString( size ) }, } )); benchmarkArrayOperations< Real >( benchmark, size ); typedef Matrices::CSR< Real, Devices::Host, int > CSRType; CSRType csrMatrix; // Vector operations benchmark.newBenchmark( String("Vector operations (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "size", convertToString( size ) }, } )); benchmarkVectorOperations< Real >( benchmark, size ); if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else { const std::size_t rows = csrMatrix.getRows(); const std::size_t cols = csrMatrix.getColumns(); const String precision = getType< Real >(); metadata["precision"] = precision; // Sparse matrix-vector multiplication benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "rows", convertToString( size ) }, { "columns", convertToString( size ) }, { "elements per row", convertToString( elementsPerRow ) }, { "rows", convertToString( rows ) }, { "columns", convertToString( cols ) } } )); benchmarkSpmvSynthetic< Real >( benchmark, size, elementsPerRow ); benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); } } void Loading @@ -73,9 +81,7 @@ setupConfig( Config::ConfigDescription & config ) config.addEntryEnum( "float" ); config.addEntryEnum( "double" ); config.addEntryEnum( "all" ); config.addEntry< int >( "size", "Size of arrays/vectors used in the benchmark.", 100000 ); config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 ); config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 ); config.addEntry< int >( "verbose", "Verbose mode.", 1 ); config.addDelimiter( "Device settings:" ); Loading @@ -100,6 +106,7 @@ main( int argc, char* argv[] ) ! Devices::Cuda::setup( parameters ) ) return EXIT_FAILURE; const String & inputFileName = parameters.getParameter< String >( "input-file" ); const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & precision = parameters.getParameter< String >( "precision" ); Loading @@ -108,9 +115,7 @@ main( int argc, char* argv[] ) // to pass 64-bit integer values // const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" ); // const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" ); const std::size_t size = parameters.getParameter< int >( "size" ); const int loops = parameters.getParameter< int >( "loops" ); const int elementsPerRow = parameters.getParameter< int >( "elements-per-row" ); const int verbose = parameters.getParameter< int >( "verbose" ); // open log file Loading @@ -125,15 +130,18 @@ main( int argc, char* argv[] ) // prepare global metadata Benchmark::MetadataMap metadata = getHardwareMetadata(); // DO: Pass the inputFileName parameter and get rows and cols from it to create the cout GUI. if( precision == "all" || precision == "float" ) runSpMVBenchmarks< float >( benchmark, metadata, size, elementsPerRow ); runSpMVBenchmarks< float >( benchmark, metadata, inputFileName ); if( precision == "all" || precision == "double" ) runSpMVBenchmarks< double >( benchmark, metadata, size, elementsPerRow ); runSpMVBenchmarks< double >( benchmark, metadata, inputFileName ); if( ! benchmark.save( logFile ) ) { std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; return EXIT_FAILURE; } std::cout << "== BENCHMARK FINISHED ==" << std::endl; return EXIT_SUCCESS; } Loading
src/Benchmarks/SpMV/spmv.h +61 −101 Original line number Diff line number Diff line Loading @@ -20,7 +20,8 @@ #include <TNL/Matrices/SlicedEllpack.h> #include <TNL/Matrices/ChunkedEllpack.h> //#include <TNL/Matrices/MatrixReader.h> #include <TNL/Matrices/MatrixReader.h> using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { Loading @@ -29,16 +30,6 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; //template< typename Matrix > //void printMatrixInfo( const String& inputFileName, // const Matrix& matrix, // std::ostream& str ) //{ // str << " Rows: " << std::setw( 8 ) << matrix.getRows(); // str << " Columns: " << std::setw( 8 ) << matrix.getColumns(); // str << " Nonzero Elements: " << std::setw( 10 ) << matrix.getNumberOfNonzeroMatrixElements(); //} template< typename Matrix > int setHostTestMatrix( Matrix& matrix, const int elementsPerRow ) Loading Loading @@ -106,8 +97,7 @@ template< typename Real, template< typename, typename, typename > class Vector = Containers::Vector > bool benchmarkSpMV( Benchmark & benchmark, const int & size, const int elementsPerRow = 5 ) const String & inputFileName ) { typedef Matrix< Real, Devices::Host, int > HostMatrix; typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix; Loading @@ -116,44 +106,26 @@ benchmarkSpMV( Benchmark & benchmark, HostMatrix hostMatrix; DeviceMatrix deviceMatrix; Containers::Vector< int, Devices::Host, int > hostRowLengths; Containers::Vector< int, Devices::Cuda, int > deviceRowLengths; HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; // create benchmark group const std::vector< String > parsedType = parseObjectType( HostMatrix::getType() ); #ifdef HAVE_CUDA benchmark.createHorizontalGroup( parsedType[ 0 ], 2 ); #else benchmark.createHorizontalGroup( parsedType[ 0 ], 1 ); #endif hostRowLengths.setSize( size ); hostMatrix.setDimensions( size, size ); hostVector.setSize( size ); hostVector2.setSize( size ); if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else { #ifdef HAVE_CUDA deviceRowLengths.setSize( size ); deviceMatrix.setDimensions( size, size ); deviceVector.setSize( size ); deviceVector2.setSize( size ); if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; #endif hostRowLengths.setValue( elementsPerRow ); #ifdef HAVE_CUDA deviceRowLengths.setValue( elementsPerRow ); #endif hostVector.setSize( hostMatrix.getColumns() ); hostVector2.setSize( hostMatrix.getRows() ); hostMatrix.setCompressedRowLengths( hostRowLengths ); #ifdef HAVE_CUDA deviceMatrix.setCompressedRowLengths( deviceRowLengths ); deviceVector.setSize( deviceMatrix.getColumns() ); deviceVector2.setSize( deviceMatrix.getRows() ); #endif const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow ); setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow ); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; // reset function auto reset = [&]() { hostVector.setValue( 1.0 ); Loading @@ -164,6 +136,10 @@ benchmarkSpMV( Benchmark & benchmark, #endif }; const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; // compute functions auto spmvHost = [&]() { hostMatrix.vectorProduct( hostVector, hostVector2 ); Loading @@ -177,35 +153,19 @@ benchmarkSpMV( Benchmark & benchmark, #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); #endif return true; } } template< typename Real = double, typename Index = int > bool benchmarkSpmvSynthetic( Benchmark & benchmark, const int & size, const int & elementsPerRow ) const String& inputFileName ) { // typedef Matrices::CSR< Real, Devices::Host, int > CSRType; // CSRType csrMatrix; // try // { // if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) // { // std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; // return false; // } // } // catch( std::bad_alloc ) // { // std::cerr << "Not enough memory to read the matrix." << std::endl; // return false; // } bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, size, elementsPerRow ); result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); Loading
src/Benchmarks/SpMV/tnl-benchmark-spmv.h +43 −35 Original line number Diff line number Diff line Loading @@ -21,43 +21,51 @@ #include <Benchmarks/BLAS/vector-operations.h> #include "spmv.h" #include <TNL/Matrices/MatrixReader.h> using namespace TNL::Matrices; using namespace TNL; using namespace TNL::Benchmarks; //template< typename Matrix > //void printMatrixInfo( const String& inputFileName, // const Matrix& matrix, // std::ostream& str ) //{ // str << " Rows: " << std::setw( 8 ) << matrix.getRows(); // str << " Columns: " << std::setw( 8 ) << matrix.getColumns(); // str << " Nonzero Elements: " << std::setw( 10 ) << matrix.getNumberOfNonzeroMatrixElements(); //} template< typename Real > void runSpMVBenchmarks( Benchmark & benchmark, Benchmark::MetadataMap metadata, const std::size_t & size, const int & elementsPerRow ) const String & inputFileName ) { const String precision = getType< Real >(); metadata["precision"] = precision; // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) // Array operations benchmark.newBenchmark( String("Array operations (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "size", convertToString( size ) }, } )); benchmarkArrayOperations< Real >( benchmark, size ); typedef Matrices::CSR< Real, Devices::Host, int > CSRType; CSRType csrMatrix; // Vector operations benchmark.newBenchmark( String("Vector operations (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "size", convertToString( size ) }, } )); benchmarkVectorOperations< Real >( benchmark, size ); if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else { const std::size_t rows = csrMatrix.getRows(); const std::size_t cols = csrMatrix.getColumns(); const String precision = getType< Real >(); metadata["precision"] = precision; // Sparse matrix-vector multiplication benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "rows", convertToString( size ) }, { "columns", convertToString( size ) }, { "elements per row", convertToString( elementsPerRow ) }, { "rows", convertToString( rows ) }, { "columns", convertToString( cols ) } } )); benchmarkSpmvSynthetic< Real >( benchmark, size, elementsPerRow ); benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); } } void Loading @@ -73,9 +81,7 @@ setupConfig( Config::ConfigDescription & config ) config.addEntryEnum( "float" ); config.addEntryEnum( "double" ); config.addEntryEnum( "all" ); config.addEntry< int >( "size", "Size of arrays/vectors used in the benchmark.", 100000 ); config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 ); config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 ); config.addEntry< int >( "verbose", "Verbose mode.", 1 ); config.addDelimiter( "Device settings:" ); Loading @@ -100,6 +106,7 @@ main( int argc, char* argv[] ) ! Devices::Cuda::setup( parameters ) ) return EXIT_FAILURE; const String & inputFileName = parameters.getParameter< String >( "input-file" ); const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & precision = parameters.getParameter< String >( "precision" ); Loading @@ -108,9 +115,7 @@ main( int argc, char* argv[] ) // to pass 64-bit integer values // const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" ); // const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" ); const std::size_t size = parameters.getParameter< int >( "size" ); const int loops = parameters.getParameter< int >( "loops" ); const int elementsPerRow = parameters.getParameter< int >( "elements-per-row" ); const int verbose = parameters.getParameter< int >( "verbose" ); // open log file Loading @@ -125,15 +130,18 @@ main( int argc, char* argv[] ) // prepare global metadata Benchmark::MetadataMap metadata = getHardwareMetadata(); // DO: Pass the inputFileName parameter and get rows and cols from it to create the cout GUI. if( precision == "all" || precision == "float" ) runSpMVBenchmarks< float >( benchmark, metadata, size, elementsPerRow ); runSpMVBenchmarks< float >( benchmark, metadata, inputFileName ); if( precision == "all" || precision == "double" ) runSpMVBenchmarks< double >( benchmark, metadata, size, elementsPerRow ); runSpMVBenchmarks< double >( benchmark, metadata, inputFileName ); if( ! benchmark.save( logFile ) ) { std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; return EXIT_FAILURE; } std::cout << "== BENCHMARK FINISHED ==" << std::endl; return EXIT_SUCCESS; }