Skip to content
Snippets Groups Projects
Commit 6365a136 authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Implemented rough version of result comparison. Implemented benchmark for...

Implemented rough version of result comparison. Implemented benchmark for comparison of TNL CSR and Cusparse on GPU. Edited log file formatting.
parent 2a405524
No related branches found
No related tags found
1 merge request!45Matrices revision
......@@ -23,6 +23,10 @@
#include <TNL/Matrices/MatrixReader.h>
using namespace TNL::Matrices;
#include <cusparse.h>
#include "cusparseCSRMatrix.h"
using namespace TNL;
namespace TNL {
namespace Benchmarks {
......@@ -72,7 +76,7 @@ benchmarkSpMV( Benchmark & benchmark,
{
// Start a buffer to capture the output of MatrixReader
std::stringstream buffer;
std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
{
......@@ -86,7 +90,7 @@ benchmarkSpMV( Benchmark & benchmark,
//https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
std::stringstream buffer;
std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix );
......@@ -106,7 +110,7 @@ benchmarkSpMV( Benchmark & benchmark,
stringErrorMsg.erase( stringErrorMsg.length() - 1 );
// https://stackoverflow.com/questions/7352099/stdstring-to-char
char* errorMsg = &stringErrorMsg[0u];
char* errorMsg = &stringErrorMsg[ 0u ];
// FIXME: Every other benchmark, the errorMsg doesn't have a "!" as
......@@ -147,7 +151,7 @@ benchmarkSpMV( Benchmark & benchmark,
stringErrorMsg.erase( stringErrorMsg.length() - 1 );
// https://stackoverflow.com/questions/7352099/stdstring-to-char
char *errorMsg = &stringErrorMsg[0u];
char *errorMsg = &stringErrorMsg[ 0u ];
// FIXME: Every other benchmark, the errorMsg doesn't have a "!" as
// a prefix in the log file.
......@@ -206,10 +210,239 @@ benchmarkSpMV( Benchmark & benchmark,
benchmark.setOperation( datasetSize );
benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
// Initialize the host vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts)
HostVector resultHostVector2;
resultHostVector2.setSize( hostVector2.getSize() );
resultHostVector2.setValue( 0.0 );
// Copy the values
for( int i = 0; i < hostVector2.getSize(); i++ )
resultHostVector2.setElement( i, hostVector2.getElement( i ) );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
#endif
// Setup the device vector to be compared
HostVector resultDeviceVector2;
resultDeviceVector2.setSize( hostVector2.getSize() );
resultDeviceVector2.setValue( 0.0 );
// resultDeviceVector2 += deviceVector2; // Throws a segfault.
// Copy the values
for( int i = 0; i < deviceVector2.getSize(); i++ )
resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) );
Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax );
std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm );
char *absMax = &resultDifferenceAbsMax[ 0u ];
char *lpNorm = &resultDifferenceLpNorm[ 0u ];
// FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG
// benchmark.addErrorMessage( absMax, 1 );
// benchmark.addErrorMessage( lpNorm, 1 );
std::cout << std::endl;
return true;
}
// Compares only CSR on GPU and Cusparse on GPU.
template< typename Real,
template< typename, typename, typename > class Vector = Containers::Vector >
bool
benchmarkCusparseSpMV( Benchmark & benchmark,
const String & inputFileName )
{
typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix;
typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
typedef Containers::Vector< Real, Devices::Host, int > HostVector;
typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
CSR_HostMatrix CSRhostMatrix;
CSR_DeviceMatrix CSRdeviceMatrix;
CudaVector deviceVector, deviceVector2;
try
{
// Start a buffer to capture the output of MatrixReader
std::stringstream buffer;
std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix ) )
{
// Capture the original output of MatrixReader, so it isn't printed by console.
std::string errorMsgBuffer = buffer.str();
// Reset the buffer
std::cerr.rdbuf( old );
std::string matrixFormat = getMatrixFormat( CSRhostMatrix );
//https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
std::stringstream buffer;
std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix );
errorMsgBuffer = buffer.str();
// Reset the buffer
std::cerr.rdbuf( old );
std::string stringErrorMsg = "Benchmark failed: Unable to read the matrix.\n"
"matrix format: " + matrixFormat +
"\nFailed to read the matrix file " +
( std::string )inputFileName + ".\n" +
errorMsgBuffer;
//https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
stringErrorMsg.erase( stringErrorMsg.length() - 1 );
// https://stackoverflow.com/questions/7352099/stdstring-to-char
char* errorMsg = &stringErrorMsg[ 0u ];
// FIXME: Every other benchmark, the errorMsg doesn't have a "!" as
// a prefix in the log file.
// (Try adding more benchmarks in benchmarkSpmvSynthetic(...)
// and you'll see)
benchmark.addErrorMessage( errorMsg, 1 );
std::cout << std::endl;
return false;
}
std::cerr.rdbuf( old );
}
catch( std::bad_alloc )
{
std::string matrixFormat = getMatrixFormat( CSRhostMatrix );
//https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
std::stringstream buffer;
std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix );
std::string errorMsgBuffer = buffer.str();
// Reset the buffer
std::cerr.rdbuf( old );
std::string stringErrorMsg = "Benchmark failed: Not enough memory.\n"
"matrix format: " + matrixFormat +
"\nFailed to allocate memory to read the matrix file " +
( std::string )inputFileName + ".\n" +
errorMsgBuffer;
//https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
stringErrorMsg.erase( stringErrorMsg.length() - 1 );
// https://stackoverflow.com/questions/7352099/stdstring-to-char
char *errorMsg = &stringErrorMsg[ 0u ];
// FIXME: Every other benchmark, the errorMsg doesn't have a "!" as
// a prefix in the log file.
// (Try adding more benchmarks in benchmarkSpmvSynthetic(...)
// and you'll see)
benchmark.addErrorMessage( errorMsg, 1 );
std::cout << std::endl;
return false;
}
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix format", convertToString( getMatrixFormat( CSRhostMatrix ) ) },
{ "non-zeros", convertToString( CSRhostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( CSRhostMatrix.getRows() ) },
{ "columns", convertToString( CSRhostMatrix.getColumns() ) }
} ));
cusparseHandle_t cusparseHandle;
cusparseCreate( &cusparseHandle );
#ifdef HAVE_CUDA
// FIXME: This doesn't work for ChunkedEllpack, because
// its cross-device assignment is not implemented yet
CSRdeviceMatrix = CSRhostMatrix;
TNL::CusparseCSR< Real > cusparseCSR;
cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle );
#endif
#ifdef HAVE_CUDA
deviceVector.setSize( CSRhostMatrix.getColumns() );
deviceVector2.setSize( CSRhostMatrix.getRows() );
#endif
// reset function
auto reset = [&]() {
#ifdef HAVE_CUDA
deviceVector.setValue( 1.0 );
deviceVector2.setValue( 0.0 );
#endif
};
const int elements = CSRhostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
// compute functions
auto spmvCuda = [&]() {
CSRdeviceMatrix.vectorProduct( deviceVector, deviceVector2 );
};
auto spmvCusparse = [&]() {
cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
};
benchmark.setOperation( datasetSize );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
// Initialize the cuda vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts)
HostVector resultCusparseVector2;
resultCusparseVector2.setSize( deviceVector2.getSize() );
resultCusparseVector2.setValue( 0.0 );
// Copy the values
for( int i = 0; i < deviceVector2.getSize(); i++ )
resultCusparseVector2.setElement( i, deviceVector2.getElement( i ) );
benchmark.time< Devices::Cuda >( reset, "GPU-Cusparse", spmvCusparse );
#endif
// Setup the device vector to be compared
HostVector resultDeviceVector2;
resultDeviceVector2.setSize( resultCusparseVector2.getSize() );
resultDeviceVector2.setValue( 0.0 );
// Copy the values
for( int i = 0; i < deviceVector2.getSize(); i++ )
resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) );
Real differenceAbsMax = resultCusparseVector2.differenceAbsMax( resultDeviceVector2 );
Real differenceLpNorm = resultCusparseVector2.differenceLpNorm( resultDeviceVector2, 1 );
std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax );
std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm );
char *absMax = &resultDifferenceAbsMax[ 0u ];
char *lpNorm = &resultDifferenceLpNorm[ 0u ];
// FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG
// benchmark.addErrorMessage( absMax, 1 );
// benchmark.addErrorMessage( lpNorm, 1 );
std::cout << std::endl;
cusparseDestroy( cusparseHandle );
return true;
}
......@@ -222,6 +455,10 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
bool result = true;
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
// This doesn't have a titles (matrix format, rows, cols, etc.) in the output, because the header is the same as before (CSR).
result |= benchmarkCusparseSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment