Loading src/Benchmarks/SpMV/spmv.h +75 −105 Original line number Diff line number Diff line Loading @@ -31,65 +31,20 @@ template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; template< typename Matrix > int setHostTestMatrix( Matrix& matrix, const int elementsPerRow ) void printMatrixInfo( const String& inputFileName, const Matrix& matrix, std::ostream& str ) { const int size = matrix.getRows(); int elements( 0 ); for( int row = 0; row < size; row++ ) { int col = row - elementsPerRow / 2; for( int element = 0; element < elementsPerRow; element++ ) { if( col + element >= 0 && col + element < size ) { matrix.setElement( row, col + element, element + 1 ); elements++; } } } return elements; } #ifdef HAVE_CUDA template< typename Matrix > __global__ void setCudaTestMatrixKernel( Matrix* matrix, const int elementsPerRow, const int gridIdx ) { const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx >= matrix->getRows() ) return; int col = rowIdx - elementsPerRow / 2; for( int element = 0; element < elementsPerRow; element++ ) { if( col + element >= 0 && col + element < matrix->getColumns() ) matrix->setElementFast( rowIdx, col + element, element + 1 ); // Get only the name of the format from getType(). std::string mtrxFullType = matrix.getType(); std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<")); std::string type = mtrxType.substr(mtrxType.find(':') + 2); str << "\n Format: " << type << std::endl; str << " Rows: " << matrix.getRows() << std::endl; str << " Cols: " << matrix.getColumns() << std::endl; str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl; } } #endif template< typename Matrix > void setCudaTestMatrix( Matrix& matrix, const int elementsPerRow ) { #ifdef HAVE_CUDA typedef typename Matrix::IndexType IndexType; typedef typename Matrix::RealType RealType; Pointers::DevicePointer< Matrix > kernel_matrix( matrix ); dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); setCudaTestMatrixKernel< Matrix > <<< cudaGridSize, cudaBlockSize >>> ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx ); TNL_CHECK_CUDA_DEVICE; } #endif } // TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h template< typename Real, Loading @@ -109,21 +64,37 @@ benchmarkSpMV( Benchmark & benchmark, HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; try { if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) ) { std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else return false; } } catch( std::bad_alloc ) { std::cerr << "Not enough memory to read the matrix." << std::endl; return false; } printMatrixInfo( inputFileName, hostMatrix, std::cout ); #ifdef HAVE_CUDA if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; // FIXME: This doesn't work for ChunkedEllpack, because // its cross-device assignment is not implemented yet. deviceMatrix = hostMatrix; #endif benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) } } )); hostVector.setSize( hostMatrix.getColumns() ); hostVector2.setSize( hostMatrix.getRows() ); #ifdef HAVE_CUDA deviceVector.setSize( deviceMatrix.getColumns() ); deviceVector2.setSize( deviceMatrix.getRows() ); deviceVector.setSize( hostMatrix.getColumns() ); deviceVector2.setSize( hostMatrix.getRows() ); #endif // reset function Loading Loading @@ -155,7 +126,6 @@ benchmarkSpMV( Benchmark & benchmark, #endif return true; } } template< typename Real = double, typename Index = int > Loading @@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName ); result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName ); return result; } Loading src/Benchmarks/SpMV/tnl-benchmark-spmv.h +10 −31 Original line number Diff line number Diff line Loading @@ -43,41 +43,25 @@ runSpMVBenchmarks( Benchmark & benchmark, Benchmark::MetadataMap metadata, const String & inputFileName ) { // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) typedef Matrices::CSR< Real, Devices::Host, int > CSRType; CSRType csrMatrix; if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else { const std::size_t rows = csrMatrix.getRows(); const std::size_t cols = csrMatrix.getColumns(); const String precision = getType< Real >(); metadata["precision"] = precision; // Sparse matrix-vector multiplication benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "rows", convertToString( rows ) }, { "columns", convertToString( cols ) } } )); benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); } } void setupConfig( Config::ConfigDescription & config ) { config.addDelimiter( "Benchmark settings:" ); config.addRequiredEntry< String >( "input-file", "Input file name." ); config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log"); config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log"); config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" ); config.addEntryEnum( "append" ); config.addEntryEnum( "overwrite" ); config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" ); config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" ); config.addEntryEnum( "float" ); config.addEntryEnum( "double" ); config.addEntryEnum( "all" ); Loading Loading @@ -110,11 +94,6 @@ main( int argc, char* argv[] ) const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & precision = parameters.getParameter< String >( "precision" ); // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(), // which have a default value. The workaround below works for int values, but it is not possible // to pass 64-bit integer values // const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" ); // const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" ); const int loops = parameters.getParameter< int >( "loops" ); const int verbose = parameters.getParameter< int >( "verbose" ); Loading Loading @@ -142,6 +121,6 @@ main( int argc, char* argv[] ) return EXIT_FAILURE; } std::cout << "== BENCHMARK FINISHED ==" << std::endl; std::cout << "\n== BENCHMARK FINISHED ==" << std::endl; return EXIT_SUCCESS; } Loading
src/Benchmarks/SpMV/spmv.h +75 −105 Original line number Diff line number Diff line Loading @@ -31,65 +31,20 @@ template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; template< typename Matrix > int setHostTestMatrix( Matrix& matrix, const int elementsPerRow ) void printMatrixInfo( const String& inputFileName, const Matrix& matrix, std::ostream& str ) { const int size = matrix.getRows(); int elements( 0 ); for( int row = 0; row < size; row++ ) { int col = row - elementsPerRow / 2; for( int element = 0; element < elementsPerRow; element++ ) { if( col + element >= 0 && col + element < size ) { matrix.setElement( row, col + element, element + 1 ); elements++; } } } return elements; } #ifdef HAVE_CUDA template< typename Matrix > __global__ void setCudaTestMatrixKernel( Matrix* matrix, const int elementsPerRow, const int gridIdx ) { const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx >= matrix->getRows() ) return; int col = rowIdx - elementsPerRow / 2; for( int element = 0; element < elementsPerRow; element++ ) { if( col + element >= 0 && col + element < matrix->getColumns() ) matrix->setElementFast( rowIdx, col + element, element + 1 ); // Get only the name of the format from getType(). std::string mtrxFullType = matrix.getType(); std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<")); std::string type = mtrxType.substr(mtrxType.find(':') + 2); str << "\n Format: " << type << std::endl; str << " Rows: " << matrix.getRows() << std::endl; str << " Cols: " << matrix.getColumns() << std::endl; str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl; } } #endif template< typename Matrix > void setCudaTestMatrix( Matrix& matrix, const int elementsPerRow ) { #ifdef HAVE_CUDA typedef typename Matrix::IndexType IndexType; typedef typename Matrix::RealType RealType; Pointers::DevicePointer< Matrix > kernel_matrix( matrix ); dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); setCudaTestMatrixKernel< Matrix > <<< cudaGridSize, cudaBlockSize >>> ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx ); TNL_CHECK_CUDA_DEVICE; } #endif } // TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h template< typename Real, Loading @@ -109,21 +64,37 @@ benchmarkSpMV( Benchmark & benchmark, HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; try { if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) ) { std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else return false; } } catch( std::bad_alloc ) { std::cerr << "Not enough memory to read the matrix." << std::endl; return false; } printMatrixInfo( inputFileName, hostMatrix, std::cout ); #ifdef HAVE_CUDA if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; // FIXME: This doesn't work for ChunkedEllpack, because // its cross-device assignment is not implemented yet. deviceMatrix = hostMatrix; #endif benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) } } )); hostVector.setSize( hostMatrix.getColumns() ); hostVector2.setSize( hostMatrix.getRows() ); #ifdef HAVE_CUDA deviceVector.setSize( deviceMatrix.getColumns() ); deviceVector2.setSize( deviceMatrix.getRows() ); deviceVector.setSize( hostMatrix.getColumns() ); deviceVector2.setSize( hostMatrix.getRows() ); #endif // reset function Loading Loading @@ -155,7 +126,6 @@ benchmarkSpMV( Benchmark & benchmark, #endif return true; } } template< typename Real = double, typename Index = int > Loading @@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName ); result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName ); return result; } Loading
src/Benchmarks/SpMV/tnl-benchmark-spmv.h +10 −31 Original line number Diff line number Diff line Loading @@ -43,41 +43,25 @@ runSpMVBenchmarks( Benchmark & benchmark, Benchmark::MetadataMap metadata, const String & inputFileName ) { // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) typedef Matrices::CSR< Real, Devices::Host, int > CSRType; CSRType csrMatrix; if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; else { const std::size_t rows = csrMatrix.getRows(); const std::size_t cols = csrMatrix.getColumns(); const String precision = getType< Real >(); metadata["precision"] = precision; // Sparse matrix-vector multiplication benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", metadata ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "rows", convertToString( rows ) }, { "columns", convertToString( cols ) } } )); benchmarkSpmvSynthetic< Real >( benchmark, inputFileName ); } } void setupConfig( Config::ConfigDescription & config ) { config.addDelimiter( "Benchmark settings:" ); config.addRequiredEntry< String >( "input-file", "Input file name." ); config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log"); config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log"); config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" ); config.addEntryEnum( "append" ); config.addEntryEnum( "overwrite" ); config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" ); config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" ); config.addEntryEnum( "float" ); config.addEntryEnum( "double" ); config.addEntryEnum( "all" ); Loading Loading @@ -110,11 +94,6 @@ main( int argc, char* argv[] ) const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & precision = parameters.getParameter< String >( "precision" ); // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(), // which have a default value. The workaround below works for int values, but it is not possible // to pass 64-bit integer values // const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" ); // const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" ); const int loops = parameters.getParameter< int >( "loops" ); const int verbose = parameters.getParameter< int >( "verbose" ); Loading Loading @@ -142,6 +121,6 @@ main( int argc, char* argv[] ) return EXIT_FAILURE; } std::cout << "== BENCHMARK FINISHED ==" << std::endl; std::cout << "\n== BENCHMARK FINISHED ==" << std::endl; return EXIT_SUCCESS; }