diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index d8b20898310a1353cad1bced491410be620abc08..37b9bf7bbff15a28937af2c3b609b8ecb5a40bfe 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -40,11 +40,11 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; std::string getMatrixFileName( const String& InputFileName ) { std::string fileName = InputFileName; - + const size_t last_slash_idx = fileName.find_last_of( "/\\" ); if( std::string::npos != last_slash_idx ) fileName.erase( 0, last_slash_idx + 1 ); - + return fileName; } @@ -55,7 +55,7 @@ std::string getMatrixFormat( const Matrix& matrix ) std::string mtrxFullType = getType( matrix ); std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) ); std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 ); - + return format; } @@ -63,7 +63,7 @@ std::string getMatrixFormat( const Matrix& matrix ) template< typename Matrix > void printMatrixInfo( const Matrix& matrix, std::ostream& str ) -{ +{ str << "\n Format: " << getMatrixFormat( matrix ) << std::endl; str << " Rows: " << matrix.getRows() << std::endl; str << " Cols: " << matrix.getColumns() << std::endl; @@ -81,67 +81,67 @@ benchmarkSpMV( Benchmark& benchmark, // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix; typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix; - + CSR_HostMatrix CSRhostMatrix; CSR_DeviceMatrix CSRdeviceMatrix; - + // Read the matrix for CSR, to set up cuSPARSE try - { + { if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) ) - { + { throw std::bad_alloc(); return false; } } - catch( std::bad_alloc e ) + catch( std::bad_alloc& e ) { e.what(); return false; } - + #ifdef HAVE_CUDA // cuSPARSE handle setup cusparseHandle_t cusparseHandle; cusparseCreate( &cusparseHandle ); - + // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device CSRdeviceMatrix = CSRhostMatrix; - + // Delete the CSRhostMatrix, so it doesn't take up unnecessary space CSRhostMatrix.reset(); - + // Initialize the cusparseCSR matrix. TNL::CusparseCSR< Real > cusparseCSR; cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle ); #endif - + // Setup the format which is given as a template parameter to this function typedef Matrix< Real, Devices::Host, int > HostMatrix; typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix; typedef Containers::Vector< Real, Devices::Host, int > HostVector; typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector; - + HostMatrix hostMatrix; DeviceMatrix deviceMatrix; HostVector hostVector, hostVector2; CudaVector deviceVector, deviceVector2; - + // Load the format try - { + { if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) ) { throw std::bad_alloc(); return false; } } - catch( std::bad_alloc e ) + catch( std::bad_alloc& e ) { e.what(); return false; } - + // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), // because we need the matrix loaded first to get the rows and columns @@ -191,17 +191,17 @@ benchmarkSpMV( Benchmark& benchmark, benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); - + // Initialize the host vector to be compared. // (The values in hostVector2 will be reset when spmvCuda starts) HostVector resultHostVector2; resultHostVector2.setSize( hostVector2.getSize() ); resultHostVector2.setValue( 0.0 ); - + // Copy the values resultHostVector2 = hostVector2; - - // Setup cuSPARSE MetaData, since it has the same header as CSR, + + // Setup cuSPARSE MetaData, since it has the same header as CSR, // and therefore will not get its own headers (rows, cols, speedup etc.) in log. // * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten. benchmark.setMetadataColumns( Benchmark::MetadataColumns({ @@ -211,7 +211,7 @@ benchmarkSpMV( Benchmark& benchmark, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", convertToString( "CSR-cuSPARSE" ) } } )); - + #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); @@ -220,51 +220,51 @@ benchmarkSpMV( Benchmark& benchmark, HostVector resultDeviceVector2; resultDeviceVector2.setSize( deviceVector2.getSize() ); resultDeviceVector2.setValue( 0.0 ); - + resultDeviceVector2 = deviceVector2; - + benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse ); - + HostVector resultcuSPARSEDeviceVector2; resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() ); resultcuSPARSEDeviceVector2.setValue( 0.0 ); - + resultcuSPARSEDeviceVector2 = deviceVector2; - + // Difference between GPU (curent format) and GPU-cuSPARSE results //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) ); //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 ); Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 ); - + std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax ); std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm ); - + char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; - - + + // Difference between CPU and GPU results for the current format //Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 ); Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) ); //Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 ); Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 ); - + std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax ); std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm ); - + char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ]; char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ]; - + // Print result differences of CPU and GPU of current format std::cout << CPUxGPU_absMax << std::endl; std::cout << CPUxGPU_lpNorm << std::endl; - + // Print result differences of GPU of current format and GPU with cuSPARSE. std::cout << GPUcuSparse_absMax << std::endl; std::cout << GPUcuSparse_lpNorm << std::endl; #endif - + std::cout << std::endl; return true; } @@ -277,11 +277,11 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, bool verboseMR ) { bool result = true; - result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); - + // AdEllpack is broken // result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );