diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 7f47cf2518e37e8d5ecd4f70bb1e04483e05ddbb..883ea084f41defc939ff71cf0fc02580f0d7aeb0 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -20,7 +20,8 @@ #include <TNL/Matrices/SlicedEllpack.h> #include <TNL/Matrices/ChunkedEllpack.h> -//#include <TNL/Matrices/MatrixReader.h> +#include <TNL/Matrices/MatrixReader.h> +using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { @@ -29,16 +30,6 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; -//template< typename Matrix > -//void printMatrixInfo( const String& inputFileName, -// const Matrix& matrix, -// std::ostream& str ) -//{ -// str << " Rows: " << std::setw( 8 ) << matrix.getRows(); -// str << " Columns: " << std::setw( 8 ) << matrix.getColumns(); -// str << " Nonzero Elements: " << std::setw( 10 ) << matrix.getNumberOfNonzeroMatrixElements(); -//} - template< typename Matrix > int setHostTestMatrix( Matrix& matrix, const int elementsPerRow ) @@ -106,106 +97,75 @@ template< typename Real, template< typename, typename, typename > class Vector = Containers::Vector > bool benchmarkSpMV( Benchmark & benchmark, - const int & size, - const int elementsPerRow = 5 ) + const String & inputFileName ) { - typedef Matrix< Real, Devices::Host, int > HostMatrix; - typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix; - typedef Containers::Vector< Real, Devices::Host, int > HostVector; - typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector; - - HostMatrix hostMatrix; - DeviceMatrix deviceMatrix; - Containers::Vector< int, Devices::Host, int > hostRowLengths; - Containers::Vector< int, Devices::Cuda, int > deviceRowLengths; - HostVector hostVector, hostVector2; - CudaVector deviceVector, deviceVector2; - - // create benchmark group - const std::vector< String > parsedType = parseObjectType( HostMatrix::getType() ); -#ifdef HAVE_CUDA - benchmark.createHorizontalGroup( parsedType[ 0 ], 2 ); -#else - benchmark.createHorizontalGroup( parsedType[ 0 ], 1 ); -#endif - - hostRowLengths.setSize( size ); - hostMatrix.setDimensions( size, size ); - hostVector.setSize( size ); - hostVector2.setSize( size ); -#ifdef HAVE_CUDA - deviceRowLengths.setSize( size ); - deviceMatrix.setDimensions( size, size ); - deviceVector.setSize( size ); - deviceVector2.setSize( size ); -#endif - - hostRowLengths.setValue( elementsPerRow ); -#ifdef HAVE_CUDA - deviceRowLengths.setValue( elementsPerRow ); -#endif - - hostMatrix.setCompressedRowLengths( hostRowLengths ); -#ifdef HAVE_CUDA - deviceMatrix.setCompressedRowLengths( deviceRowLengths ); -#endif - - const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow ); - setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow ); - const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; - - // reset function - auto reset = [&]() { - hostVector.setValue( 1.0 ); - hostVector2.setValue( 0.0 ); -#ifdef HAVE_CUDA - deviceVector.setValue( 1.0 ); - deviceVector2.setValue( 0.0 ); -#endif - }; - - // compute functions - auto spmvHost = [&]() { - hostMatrix.vectorProduct( hostVector, hostVector2 ); - }; - auto spmvCuda = [&]() { - deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); - }; - - benchmark.setOperation( datasetSize ); - benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); -#ifdef HAVE_CUDA - benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); -#endif - - return true; + typedef Matrix< Real, Devices::Host, int > HostMatrix; + typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix; + typedef Containers::Vector< Real, Devices::Host, int > HostVector; + typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector; + + HostMatrix hostMatrix; + DeviceMatrix deviceMatrix; + HostVector hostVector, hostVector2; + CudaVector deviceVector, deviceVector2; + + if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) ) + std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; + else + { + #ifdef HAVE_CUDA + if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) ) + std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; + #endif + + hostVector.setSize( hostMatrix.getColumns() ); + hostVector2.setSize( hostMatrix.getRows() ); + + #ifdef HAVE_CUDA + deviceVector.setSize( deviceMatrix.getColumns() ); + deviceVector2.setSize( deviceMatrix.getRows() ); + #endif + + // reset function + auto reset = [&]() { + hostVector.setValue( 1.0 ); + hostVector2.setValue( 0.0 ); + #ifdef HAVE_CUDA + deviceVector.setValue( 1.0 ); + deviceVector2.setValue( 0.0 ); + #endif + }; + + const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); + + const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; + + // compute functions + auto spmvHost = [&]() { + hostMatrix.vectorProduct( hostVector, hostVector2 ); + }; + auto spmvCuda = [&]() { + deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); + }; + + benchmark.setOperation( datasetSize ); + benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); + #ifdef HAVE_CUDA + benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); + #endif + return true; + } } template< typename Real = double, typename Index = int > bool benchmarkSpmvSynthetic( Benchmark & benchmark, - const int & size, - const int & elementsPerRow ) + const String& inputFileName ) { -// typedef Matrices::CSR< Real, Devices::Host, int > CSRType; -// CSRType csrMatrix; -// try -// { -// if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) -// { -// std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; -// return false; -// } -// } -// catch( std::bad_alloc ) -// { -// std::cerr << "Not enough memory to read the matrix." << std::endl; -// return false; -// } bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) - result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, size, elementsPerRow ); + result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index b3ad3102e1da327585fcfb2d50562b68df834427..4493dd4cab921dc8bc426b7fb3ee21f359079608 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -21,43 +21,51 @@ #include <Benchmarks/BLAS/vector-operations.h> #include "spmv.h" +#include <TNL/Matrices/MatrixReader.h> +using namespace TNL::Matrices; + using namespace TNL; using namespace TNL::Benchmarks; +//template< typename Matrix > +//void printMatrixInfo( const String& inputFileName, +// const Matrix& matrix, +// std::ostream& str ) +//{ +// str << " Rows: " << std::setw( 8 ) << matrix.getRows(); +// str << " Columns: " << std::setw( 8 ) << matrix.getColumns(); +// str << " Nonzero Elements: " << std::setw( 10 ) << matrix.getNumberOfNonzeroMatrixElements(); +//} template< typename Real > void runSpMVBenchmarks( Benchmark & benchmark, Benchmark::MetadataMap metadata, - const std::size_t & size, - const int & elementsPerRow ) + const String & inputFileName ) { - const String precision = getType< Real >(); - metadata["precision"] = precision; - - // Array operations - benchmark.newBenchmark( String("Array operations (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - { "size", convertToString( size ) }, } )); - benchmarkArrayOperations< Real >( benchmark, size ); - - // Vector operations - benchmark.newBenchmark( String("Vector operations (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - { "size", convertToString( size ) }, } )); - benchmarkVectorOperations< Real >( benchmark, size ); - - // Sparse matrix-vector multiplication - benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - { "rows", convertToString( size ) }, - { "columns", convertToString( size ) }, - { "elements per row", convertToString( elementsPerRow ) }, - } )); - benchmarkSpmvSynthetic< Real >( benchmark, size, elementsPerRow ); + // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) + + typedef Matrices::CSR< Real, Devices::Host, int > CSRType; + CSRType csrMatrix; + + if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) + std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; + else + { + const std::size_t rows = csrMatrix.getRows(); + const std::size_t cols = csrMatrix.getColumns(); + const String precision = getType< Real >(); + metadata["precision"] = precision; + + // Sparse matrix-vector multiplication + benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", + metadata ); + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "rows", convertToString( rows ) }, + { "columns", convertToString( cols ) } + } )); + benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); + } } void @@ -73,9 +81,7 @@ setupConfig( Config::ConfigDescription & config ) config.addEntryEnum( "float" ); config.addEntryEnum( "double" ); config.addEntryEnum( "all" ); - config.addEntry< int >( "size", "Size of arrays/vectors used in the benchmark.", 100000 ); config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 ); - config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 ); config.addEntry< int >( "verbose", "Verbose mode.", 1 ); config.addDelimiter( "Device settings:" ); @@ -100,6 +106,7 @@ main( int argc, char* argv[] ) ! Devices::Cuda::setup( parameters ) ) return EXIT_FAILURE; + const String & inputFileName = parameters.getParameter< String >( "input-file" ); const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & precision = parameters.getParameter< String >( "precision" ); @@ -108,9 +115,7 @@ main( int argc, char* argv[] ) // to pass 64-bit integer values // const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" ); // const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" ); - const std::size_t size = parameters.getParameter< int >( "size" ); const int loops = parameters.getParameter< int >( "loops" ); - const int elementsPerRow = parameters.getParameter< int >( "elements-per-row" ); const int verbose = parameters.getParameter< int >( "verbose" ); // open log file @@ -124,16 +129,19 @@ main( int argc, char* argv[] ) // prepare global metadata Benchmark::MetadataMap metadata = getHardwareMetadata(); - + + + // DO: Pass the inputFileName parameter and get rows and cols from it to create the cout GUI. if( precision == "all" || precision == "float" ) - runSpMVBenchmarks< float >( benchmark, metadata, size, elementsPerRow ); + runSpMVBenchmarks< float >( benchmark, metadata, inputFileName ); if( precision == "all" || precision == "double" ) - runSpMVBenchmarks< double >( benchmark, metadata, size, elementsPerRow ); + runSpMVBenchmarks< double >( benchmark, metadata, inputFileName ); if( ! benchmark.save( logFile ) ) { std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; return EXIT_FAILURE; } + std::cout << "== BENCHMARK FINISHED ==" << std::endl; return EXIT_SUCCESS; }