Skip to content
Snippets Groups Projects
Commit 2ebb1334 authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Implemented rought version of SpMV Benchmark for mtx files.

parent aa627012
No related branches found
No related tags found
1 merge request!45Matrices revision
...@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index > ...@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index >
using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;
template< typename Matrix > template< typename Matrix >
int setHostTestMatrix( Matrix& matrix, void printMatrixInfo( const String& inputFileName,
const int elementsPerRow ) const Matrix& matrix,
std::ostream& str )
{ {
const int size = matrix.getRows(); // Get only the name of the format from getType().
int elements( 0 ); std::string mtrxFullType = matrix.getType();
for( int row = 0; row < size; row++ ) { std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
int col = row - elementsPerRow / 2; std::string type = mtrxType.substr(mtrxType.find(':') + 2);
for( int element = 0; element < elementsPerRow; element++ ) {
if( col + element >= 0 && str << "\n Format: " << type << std::endl;
col + element < size ) str << " Rows: " << matrix.getRows() << std::endl;
{ str << " Cols: " << matrix.getColumns() << std::endl;
matrix.setElement( row, col + element, element + 1 ); str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
elements++;
}
}
}
return elements;
}
#ifdef HAVE_CUDA
template< typename Matrix >
__global__ void setCudaTestMatrixKernel( Matrix* matrix,
const int elementsPerRow,
const int gridIdx )
{
const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( rowIdx >= matrix->getRows() )
return;
int col = rowIdx - elementsPerRow / 2;
for( int element = 0; element < elementsPerRow; element++ ) {
if( col + element >= 0 &&
col + element < matrix->getColumns() )
matrix->setElementFast( rowIdx, col + element, element + 1 );
}
}
#endif
template< typename Matrix >
void setCudaTestMatrix( Matrix& matrix,
const int elementsPerRow )
{
#ifdef HAVE_CUDA
typedef typename Matrix::IndexType IndexType;
typedef typename Matrix::RealType RealType;
Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
if( gridIdx == cudaGrids - 1 )
cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
setCudaTestMatrixKernel< Matrix >
<<< cudaGridSize, cudaBlockSize >>>
( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
TNL_CHECK_CUDA_DEVICE;
}
#endif
} }
// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h // TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
template< typename Real, template< typename Real,
template< typename, typename, typename > class Matrix, template< typename, typename, typename > class Matrix,
...@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark, ...@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark,
HostVector hostVector, hostVector2; HostVector hostVector, hostVector2;
CudaVector deviceVector, deviceVector2; CudaVector deviceVector, deviceVector2;
if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) ) try
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; {
else if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
{ {
#ifdef HAVE_CUDA
if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) )
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
#endif return false;
}
hostVector.setSize( hostMatrix.getColumns() ); }
hostVector2.setSize( hostMatrix.getRows() ); catch( std::bad_alloc )
{
#ifdef HAVE_CUDA std::cerr << "Not enough memory to read the matrix." << std::endl;
deviceVector.setSize( deviceMatrix.getColumns() ); return false;
deviceVector2.setSize( deviceMatrix.getRows() ); }
#endif printMatrixInfo( inputFileName, hostMatrix, std::cout );
#ifdef HAVE_CUDA
// reset function // FIXME: This doesn't work for ChunkedEllpack, because
auto reset = [&]() { // its cross-device assignment is not implemented yet.
hostVector.setValue( 1.0 ); deviceMatrix = hostMatrix;
hostVector2.setValue( 0.0 ); #endif
#ifdef HAVE_CUDA
deviceVector.setValue( 1.0 ); benchmark.setMetadataColumns( Benchmark::MetadataColumns({
deviceVector2.setValue( 0.0 ); { "rows", convertToString( hostMatrix.getRows() ) },
#endif { "columns", convertToString( hostMatrix.getColumns() ) }
}; } ));
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); hostVector.setSize( hostMatrix.getColumns() );
hostVector2.setSize( hostMatrix.getRows() );
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
#ifdef HAVE_CUDA
// compute functions deviceVector.setSize( hostMatrix.getColumns() );
auto spmvHost = [&]() { deviceVector2.setSize( hostMatrix.getRows() );
hostMatrix.vectorProduct( hostVector, hostVector2 ); #endif
};
auto spmvCuda = [&]() { // reset function
deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); auto reset = [&]() {
}; hostVector.setValue( 1.0 );
hostVector2.setValue( 0.0 );
benchmark.setOperation( datasetSize ); #ifdef HAVE_CUDA
benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); deviceVector.setValue( 1.0 );
#ifdef HAVE_CUDA deviceVector2.setValue( 0.0 );
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); #endif
#endif };
return true;
} const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
// compute functions
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostVector, hostVector2 );
};
auto spmvCuda = [&]() {
deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
};
benchmark.setOperation( datasetSize );
benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
#endif
return true;
} }
template< typename Real = double, template< typename Real = double,
...@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, ...@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
bool result = true; bool result = true;
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName ); result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow ); result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow ); // result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
return result; return result;
} }
......
...@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark, ...@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark,
Benchmark::MetadataMap metadata, Benchmark::MetadataMap metadata,
const String & inputFileName ) const String & inputFileName )
{ {
// DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h) const String precision = getType< Real >();
metadata["precision"] = precision;
typedef Matrices::CSR< Real, Devices::Host, int > CSRType;
CSRType csrMatrix; // Sparse matrix-vector multiplication
benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) ) metadata );
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl; benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
else
{
const std::size_t rows = csrMatrix.getRows();
const std::size_t cols = csrMatrix.getColumns();
const String precision = getType< Real >();
metadata["precision"] = precision;
// Sparse matrix-vector multiplication
benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
metadata );
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "rows", convertToString( rows ) },
{ "columns", convertToString( cols ) }
} ));
benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
}
} }
void void
...@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config ) ...@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config )
{ {
config.addDelimiter( "Benchmark settings:" ); config.addDelimiter( "Benchmark settings:" );
config.addRequiredEntry< String >( "input-file", "Input file name." ); config.addRequiredEntry< String >( "input-file", "Input file name." );
config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log"); config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log");
config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" ); config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
config.addEntryEnum( "append" ); config.addEntryEnum( "append" );
config.addEntryEnum( "overwrite" ); config.addEntryEnum( "overwrite" );
config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" ); config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" );
config.addEntryEnum( "float" ); config.addEntryEnum( "float" );
config.addEntryEnum( "double" ); config.addEntryEnum( "double" );
config.addEntryEnum( "all" ); config.addEntryEnum( "all" );
...@@ -110,11 +94,6 @@ main( int argc, char* argv[] ) ...@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
const String & logFileName = parameters.getParameter< String >( "log-file" ); const String & logFileName = parameters.getParameter< String >( "log-file" );
const String & outputMode = parameters.getParameter< String >( "output-mode" ); const String & outputMode = parameters.getParameter< String >( "output-mode" );
const String & precision = parameters.getParameter< String >( "precision" ); const String & precision = parameters.getParameter< String >( "precision" );
// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
// which have a default value. The workaround below works for int values, but it is not possible
// to pass 64-bit integer values
// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
const int loops = parameters.getParameter< int >( "loops" ); const int loops = parameters.getParameter< int >( "loops" );
const int verbose = parameters.getParameter< int >( "verbose" ); const int verbose = parameters.getParameter< int >( "verbose" );
...@@ -142,6 +121,6 @@ main( int argc, char* argv[] ) ...@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
return EXIT_FAILURE; return EXIT_FAILURE;
} }
std::cout << "== BENCHMARK FINISHED ==" << std::endl; std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment