From 6365a1368cf6c6ca679078dd262208ecba709884 Mon Sep 17 00:00:00 2001 From: Lukas Cejka <lukas.ostatek@gmail.com> Date: Sat, 9 Mar 2019 18:40:47 +0100 Subject: [PATCH] Implemented rough version of result comparison. Implemented benchmark for comparison of TNL CSR and Cusparse on GPU. Edited log file formatting. --- src/Benchmarks/SpMV/spmv.h | 245 ++++++++++++++++++++++++++++++++++++- 1 file changed, 241 insertions(+), 4 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 1ea7a26682..32d41bb6a3 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -23,6 +23,10 @@ #include <TNL/Matrices/MatrixReader.h> using namespace TNL::Matrices; +#include <cusparse.h> +#include "cusparseCSRMatrix.h" +using namespace TNL; + namespace TNL { namespace Benchmarks { @@ -72,7 +76,7 @@ benchmarkSpMV( Benchmark & benchmark, { // Start a buffer to capture the output of MatrixReader std::stringstream buffer; - std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf()); + std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() ); if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) ) { @@ -86,7 +90,7 @@ benchmarkSpMV( Benchmark & benchmark, //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string std::stringstream buffer; - std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf()); + std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() ); MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ); @@ -106,7 +110,7 @@ benchmarkSpMV( Benchmark & benchmark, stringErrorMsg.erase( stringErrorMsg.length() - 1 ); // https://stackoverflow.com/questions/7352099/stdstring-to-char - char* errorMsg = &stringErrorMsg[0u]; + char* errorMsg = &stringErrorMsg[ 0u ]; // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as @@ -147,7 +151,7 @@ benchmarkSpMV( Benchmark & benchmark, stringErrorMsg.erase( stringErrorMsg.length() - 1 ); // https://stackoverflow.com/questions/7352099/stdstring-to-char - char *errorMsg = &stringErrorMsg[0u]; + char *errorMsg = &stringErrorMsg[ 0u ]; // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as // a prefix in the log file. @@ -206,10 +210,239 @@ benchmarkSpMV( Benchmark & benchmark, benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); + + // Initialize the host vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts) + HostVector resultHostVector2; + resultHostVector2.setSize( hostVector2.getSize() ); + resultHostVector2.setValue( 0.0 ); + // Copy the values + for( int i = 0; i < hostVector2.getSize(); i++ ) + resultHostVector2.setElement( i, hostVector2.getElement( i ) ); + + #ifdef HAVE_CUDA + benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); + #endif + + // Setup the device vector to be compared + HostVector resultDeviceVector2; + resultDeviceVector2.setSize( hostVector2.getSize() ); + resultDeviceVector2.setValue( 0.0 ); + +// resultDeviceVector2 += deviceVector2; // Throws a segfault. + + // Copy the values + for( int i = 0; i < deviceVector2.getSize(); i++ ) + resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) ); + + Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 ); + Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 ); + + std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax ); + std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm ); + + char *absMax = &resultDifferenceAbsMax[ 0u ]; + char *lpNorm = &resultDifferenceLpNorm[ 0u ]; + + // FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG +// benchmark.addErrorMessage( absMax, 1 ); +// benchmark.addErrorMessage( lpNorm, 1 ); + + std::cout << std::endl; + return true; +} + +// Compares only CSR on GPU and Cusparse on GPU. +template< typename Real, + template< typename, typename, typename > class Vector = Containers::Vector > +bool +benchmarkCusparseSpMV( Benchmark & benchmark, + const String & inputFileName ) +{ + typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix; + typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix; + typedef Containers::Vector< Real, Devices::Host, int > HostVector; + typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector; + + CSR_HostMatrix CSRhostMatrix; + CSR_DeviceMatrix CSRdeviceMatrix; + CudaVector deviceVector, deviceVector2; + + try + { + // Start a buffer to capture the output of MatrixReader + std::stringstream buffer; + std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() ); + + if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix ) ) + { + // Capture the original output of MatrixReader, so it isn't printed by console. + std::string errorMsgBuffer = buffer.str(); + // Reset the buffer + std::cerr.rdbuf( old ); + + + std::string matrixFormat = getMatrixFormat( CSRhostMatrix ); + + //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string + std::stringstream buffer; + std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() ); + + MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix ); + + errorMsgBuffer = buffer.str(); + + // Reset the buffer + std::cerr.rdbuf( old ); + + std::string stringErrorMsg = "Benchmark failed: Unable to read the matrix.\n" + "matrix format: " + matrixFormat + + "\nFailed to read the matrix file " + + ( std::string )inputFileName + ".\n" + + errorMsgBuffer; + + //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string + if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' ) + stringErrorMsg.erase( stringErrorMsg.length() - 1 ); + + // https://stackoverflow.com/questions/7352099/stdstring-to-char + char* errorMsg = &stringErrorMsg[ 0u ]; + + + // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as + // a prefix in the log file. + // (Try adding more benchmarks in benchmarkSpmvSynthetic(...) + // and you'll see) + benchmark.addErrorMessage( errorMsg, 1 ); + + std::cout << std::endl; + + return false; + } + std::cerr.rdbuf( old ); + } + catch( std::bad_alloc ) + { + std::string matrixFormat = getMatrixFormat( CSRhostMatrix ); + + //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string + std::stringstream buffer; + std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf()); + + MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix ); + + std::string errorMsgBuffer = buffer.str(); + + // Reset the buffer + std::cerr.rdbuf( old ); + + std::string stringErrorMsg = "Benchmark failed: Not enough memory.\n" + "matrix format: " + matrixFormat + + "\nFailed to allocate memory to read the matrix file " + + ( std::string )inputFileName + ".\n" + + errorMsgBuffer; + + //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string + if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' ) + stringErrorMsg.erase( stringErrorMsg.length() - 1 ); + + // https://stackoverflow.com/questions/7352099/stdstring-to-char + char *errorMsg = &stringErrorMsg[ 0u ]; + + // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as + // a prefix in the log file. + // (Try adding more benchmarks in benchmarkSpmvSynthetic(...) + // and you'll see) + benchmark.addErrorMessage( errorMsg, 1 ); + + std::cout << std::endl; + + return false; + } + + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "matrix format", convertToString( getMatrixFormat( CSRhostMatrix ) ) }, + { "non-zeros", convertToString( CSRhostMatrix.getNumberOfNonzeroMatrixElements() ) }, + { "rows", convertToString( CSRhostMatrix.getRows() ) }, + { "columns", convertToString( CSRhostMatrix.getColumns() ) } + } )); + + cusparseHandle_t cusparseHandle; + cusparseCreate( &cusparseHandle ); + +#ifdef HAVE_CUDA + // FIXME: This doesn't work for ChunkedEllpack, because + // its cross-device assignment is not implemented yet + CSRdeviceMatrix = CSRhostMatrix; + + TNL::CusparseCSR< Real > cusparseCSR; + cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle ); +#endif + +#ifdef HAVE_CUDA + deviceVector.setSize( CSRhostMatrix.getColumns() ); + deviceVector2.setSize( CSRhostMatrix.getRows() ); +#endif + + // reset function + auto reset = [&]() { + #ifdef HAVE_CUDA + deviceVector.setValue( 1.0 ); + deviceVector2.setValue( 0.0 ); + #endif + }; + + const int elements = CSRhostMatrix.getNumberOfNonzeroMatrixElements(); + + const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; + + // compute functions + auto spmvCuda = [&]() { + CSRdeviceMatrix.vectorProduct( deviceVector, deviceVector2 ); + }; + auto spmvCusparse = [&]() { + cusparseCSR.vectorProduct( deviceVector, deviceVector2 ); + }; + + benchmark.setOperation( datasetSize ); + #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); + + // Initialize the cuda vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts) + HostVector resultCusparseVector2; + resultCusparseVector2.setSize( deviceVector2.getSize() ); + resultCusparseVector2.setValue( 0.0 ); + // Copy the values + for( int i = 0; i < deviceVector2.getSize(); i++ ) + resultCusparseVector2.setElement( i, deviceVector2.getElement( i ) ); + + benchmark.time< Devices::Cuda >( reset, "GPU-Cusparse", spmvCusparse ); #endif + + // Setup the device vector to be compared + HostVector resultDeviceVector2; + resultDeviceVector2.setSize( resultCusparseVector2.getSize() ); + resultDeviceVector2.setValue( 0.0 ); + + // Copy the values + for( int i = 0; i < deviceVector2.getSize(); i++ ) + resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) ); + + Real differenceAbsMax = resultCusparseVector2.differenceAbsMax( resultDeviceVector2 ); + Real differenceLpNorm = resultCusparseVector2.differenceLpNorm( resultDeviceVector2, 1 ); + + std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax ); + std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm ); + + char *absMax = &resultDifferenceAbsMax[ 0u ]; + char *lpNorm = &resultDifferenceLpNorm[ 0u ]; + + // FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG +// benchmark.addErrorMessage( absMax, 1 ); +// benchmark.addErrorMessage( lpNorm, 1 ); + std::cout << std::endl; + cusparseDestroy( cusparseHandle ); return true; } @@ -222,6 +455,10 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool result = true; // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); + + // This doesn't have a titles (matrix format, rows, cols, etc.) in the output, because the header is the same as before (CSR). + result |= benchmarkCusparseSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); + result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName ); result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName ); -- GitLab