diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 883ea084f41defc939ff71cf0fc02580f0d7aeb0..7d06905a2d471ec5dee4f32d47bb064345a5bf18 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; template< typename Matrix > -int setHostTestMatrix( Matrix& matrix, - const int elementsPerRow ) +void printMatrixInfo( const String& inputFileName, + const Matrix& matrix, + std::ostream& str ) { - const int size = matrix.getRows(); - int elements( 0 ); - for( int row = 0; row < size; row++ ) { - int col = row - elementsPerRow / 2; - for( int element = 0; element < elementsPerRow; element++ ) { - if( col + element >= 0 && - col + element < size ) - { - matrix.setElement( row, col + element, element + 1 ); - elements++; - } - } - } - return elements; -} - -#ifdef HAVE_CUDA -template< typename Matrix > -__global__ void setCudaTestMatrixKernel( Matrix* matrix, - const int elementsPerRow, - const int gridIdx ) -{ - const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx >= matrix->getRows() ) - return; - int col = rowIdx - elementsPerRow / 2; - for( int element = 0; element < elementsPerRow; element++ ) { - if( col + element >= 0 && - col + element < matrix->getColumns() ) - matrix->setElementFast( rowIdx, col + element, element + 1 ); - } -} -#endif - -template< typename Matrix > -void setCudaTestMatrix( Matrix& matrix, - const int elementsPerRow ) -{ -#ifdef HAVE_CUDA - typedef typename Matrix::IndexType IndexType; - typedef typename Matrix::RealType RealType; - Pointers::DevicePointer< Matrix > kernel_matrix( matrix ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); - setCudaTestMatrixKernel< Matrix > - <<< cudaGridSize, cudaBlockSize >>> - ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx ); - TNL_CHECK_CUDA_DEVICE; - } -#endif + // Get only the name of the format from getType(). + std::string mtrxFullType = matrix.getType(); + std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<")); + std::string type = mtrxType.substr(mtrxType.find(':') + 2); + + str << "\n Format: " << type << std::endl; + str << " Rows: " << matrix.getRows() << std::endl; + str << " Cols: " << matrix.getColumns() << std::endl; + str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl; } - // TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h template< typename Real, template< typename, typename, typename > class Matrix, @@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark, HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; - if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) ) - std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; - else - { - #ifdef HAVE_CUDA - if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) ) + try + { + if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) ) + { std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; - #endif - - hostVector.setSize( hostMatrix.getColumns() ); - hostVector2.setSize( hostMatrix.getRows() ); - - #ifdef HAVE_CUDA - deviceVector.setSize( deviceMatrix.getColumns() ); - deviceVector2.setSize( deviceMatrix.getRows() ); - #endif - - // reset function - auto reset = [&]() { - hostVector.setValue( 1.0 ); - hostVector2.setValue( 0.0 ); - #ifdef HAVE_CUDA - deviceVector.setValue( 1.0 ); - deviceVector2.setValue( 0.0 ); - #endif - }; - - const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); - - const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; - - // compute functions - auto spmvHost = [&]() { - hostMatrix.vectorProduct( hostVector, hostVector2 ); - }; - auto spmvCuda = [&]() { - deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); - }; - - benchmark.setOperation( datasetSize ); - benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); - #ifdef HAVE_CUDA - benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); - #endif - return true; - } + return false; + } + } + catch( std::bad_alloc ) + { + std::cerr << "Not enough memory to read the matrix." << std::endl; + return false; + } + printMatrixInfo( inputFileName, hostMatrix, std::cout ); +#ifdef HAVE_CUDA + // FIXME: This doesn't work for ChunkedEllpack, because + // its cross-device assignment is not implemented yet. + deviceMatrix = hostMatrix; +#endif + + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "rows", convertToString( hostMatrix.getRows() ) }, + { "columns", convertToString( hostMatrix.getColumns() ) } + } )); + + hostVector.setSize( hostMatrix.getColumns() ); + hostVector2.setSize( hostMatrix.getRows() ); + +#ifdef HAVE_CUDA + deviceVector.setSize( hostMatrix.getColumns() ); + deviceVector2.setSize( hostMatrix.getRows() ); +#endif + + // reset function + auto reset = [&]() { + hostVector.setValue( 1.0 ); + hostVector2.setValue( 0.0 ); + #ifdef HAVE_CUDA + deviceVector.setValue( 1.0 ); + deviceVector2.setValue( 0.0 ); + #endif + }; + + const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); + + const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; + + // compute functions + auto spmvHost = [&]() { + hostMatrix.vectorProduct( hostVector, hostVector2 ); + }; + auto spmvCuda = [&]() { + deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); + }; + + benchmark.setOperation( datasetSize ); + benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); + #ifdef HAVE_CUDA + benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); + #endif + return true; } template< typename Real = double, @@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); -// result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); -// result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); -// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); + result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName ); + result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName ); +// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName ); return result; } diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index 4493dd4cab921dc8bc426b7fb3ee21f359079608..6a13aefce360910abab68ceb35c4118f0da8f8ad 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark, Benchmark::MetadataMap metadata, const String & inputFileName ) { - // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) - - typedef Matrices::CSR< Real, Devices::Host, int > CSRType; - CSRType csrMatrix; - - if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) - std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; - else - { - const std::size_t rows = csrMatrix.getRows(); - const std::size_t cols = csrMatrix.getColumns(); - const String precision = getType< Real >(); - metadata["precision"] = precision; - - // Sparse matrix-vector multiplication - benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - { "rows", convertToString( rows ) }, - { "columns", convertToString( cols ) } - } )); - benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); - } + const String precision = getType< Real >(); + metadata["precision"] = precision; + + // Sparse matrix-vector multiplication + benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", + metadata ); + benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); } void @@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config ) { config.addDelimiter( "Benchmark settings:" ); config.addRequiredEntry< String >( "input-file", "Input file name." ); - config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log"); + config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log"); config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" ); config.addEntryEnum( "append" ); config.addEntryEnum( "overwrite" ); - config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" ); + config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" ); config.addEntryEnum( "float" ); config.addEntryEnum( "double" ); config.addEntryEnum( "all" ); @@ -110,11 +94,6 @@ main( int argc, char* argv[] ) const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & precision = parameters.getParameter< String >( "precision" ); - // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(), - // which have a default value. The workaround below works for int values, but it is not possible - // to pass 64-bit integer values -// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" ); -// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" ); const int loops = parameters.getParameter< int >( "loops" ); const int verbose = parameters.getParameter< int >( "verbose" ); @@ -142,6 +121,6 @@ main( int argc, char* argv[] ) return EXIT_FAILURE; } - std::cout << "== BENCHMARK FINISHED ==" << std::endl; + std::cout << "\n== BENCHMARK FINISHED ==" << std::endl; return EXIT_SUCCESS; }