Skip to content
Snippets Groups Projects
Commit 2ebb1334 authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Implemented rought version of SpMV Benchmark for mtx files.

parent aa627012
No related branches found
No related tags found
1 merge request!45Matrices revision
......@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index >
using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;
template< typename Matrix >
int setHostTestMatrix( Matrix& matrix,
const int elementsPerRow )
void printMatrixInfo( const String& inputFileName,
const Matrix& matrix,
std::ostream& str )
{
const int size = matrix.getRows();
int elements( 0 );
for( int row = 0; row < size; row++ ) {
int col = row - elementsPerRow / 2;
for( int element = 0; element < elementsPerRow; element++ ) {
if( col + element >= 0 &&
col + element < size )
{
matrix.setElement( row, col + element, element + 1 );
elements++;
}
}
}
return elements;
}
#ifdef HAVE_CUDA
template< typename Matrix >
__global__ void setCudaTestMatrixKernel( Matrix* matrix,
const int elementsPerRow,
const int gridIdx )
{
const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( rowIdx >= matrix->getRows() )
return;
int col = rowIdx - elementsPerRow / 2;
for( int element = 0; element < elementsPerRow; element++ ) {
if( col + element >= 0 &&
col + element < matrix->getColumns() )
matrix->setElementFast( rowIdx, col + element, element + 1 );
}
}
#endif
template< typename Matrix >
void setCudaTestMatrix( Matrix& matrix,
const int elementsPerRow )
{
#ifdef HAVE_CUDA
typedef typename Matrix::IndexType IndexType;
typedef typename Matrix::RealType RealType;
Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
if( gridIdx == cudaGrids - 1 )
cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
setCudaTestMatrixKernel< Matrix >
<<< cudaGridSize, cudaBlockSize >>>
( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
TNL_CHECK_CUDA_DEVICE;
}
#endif
// Get only the name of the format from getType().
std::string mtrxFullType = matrix.getType();
std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
std::string type = mtrxType.substr(mtrxType.find(':') + 2);
str << "\n Format: " << type << std::endl;
str << " Rows: " << matrix.getRows() << std::endl;
str << " Cols: " << matrix.getColumns() << std::endl;
str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
}
// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
template< typename Real,
template< typename, typename, typename > class Matrix,
......@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark,
HostVector hostVector, hostVector2;
CudaVector deviceVector, deviceVector2;
if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) )
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
else
{
#ifdef HAVE_CUDA
if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) )
try
{
if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
{
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
#endif
hostVector.setSize( hostMatrix.getColumns() );
hostVector2.setSize( hostMatrix.getRows() );
#ifdef HAVE_CUDA
deviceVector.setSize( deviceMatrix.getColumns() );
deviceVector2.setSize( deviceMatrix.getRows() );
#endif
// reset function
auto reset = [&]() {
hostVector.setValue( 1.0 );
hostVector2.setValue( 0.0 );
#ifdef HAVE_CUDA
deviceVector.setValue( 1.0 );
deviceVector2.setValue( 0.0 );
#endif
};
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
// compute functions
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostVector, hostVector2 );
};
auto spmvCuda = [&]() {
deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
};
benchmark.setOperation( datasetSize );
benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
#endif
return true;
}
return false;
}
}
catch( std::bad_alloc )
{
std::cerr << "Not enough memory to read the matrix." << std::endl;
return false;
}
printMatrixInfo( inputFileName, hostMatrix, std::cout );
#ifdef HAVE_CUDA
// FIXME: This doesn't work for ChunkedEllpack, because
// its cross-device assignment is not implemented yet.
deviceMatrix = hostMatrix;
#endif
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) }
} ));
hostVector.setSize( hostMatrix.getColumns() );
hostVector2.setSize( hostMatrix.getRows() );
#ifdef HAVE_CUDA
deviceVector.setSize( hostMatrix.getColumns() );
deviceVector2.setSize( hostMatrix.getRows() );
#endif
// reset function
auto reset = [&]() {
hostVector.setValue( 1.0 );
hostVector2.setValue( 0.0 );
#ifdef HAVE_CUDA
deviceVector.setValue( 1.0 );
deviceVector2.setValue( 0.0 );
#endif
};
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
// compute functions
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostVector, hostVector2 );
};
auto spmvCuda = [&]() {
deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
};
benchmark.setOperation( datasetSize );
benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
#endif
return true;
}
template< typename Real = double,
......@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
bool result = true;
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow );
// result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow );
result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
return result;
}
......
......@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark,
Benchmark::MetadataMap metadata,
const String & inputFileName )
{
// DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h)
typedef Matrices::CSR< Real, Devices::Host, int > CSRType;
CSRType csrMatrix;
if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) )
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
else
{
const std::size_t rows = csrMatrix.getRows();
const std::size_t cols = csrMatrix.getColumns();
const String precision = getType< Real >();
metadata["precision"] = precision;
// Sparse matrix-vector multiplication
benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
metadata );
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "rows", convertToString( rows ) },
{ "columns", convertToString( cols ) }
} ));
benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
}
const String precision = getType< Real >();
metadata["precision"] = precision;
// Sparse matrix-vector multiplication
benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
metadata );
benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
}
void
......@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config )
{
config.addDelimiter( "Benchmark settings:" );
config.addRequiredEntry< String >( "input-file", "Input file name." );
config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log");
config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
config.addEntryEnum( "append" );
config.addEntryEnum( "overwrite" );
config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" );
config.addEntryEnum( "float" );
config.addEntryEnum( "double" );
config.addEntryEnum( "all" );
......@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
const String & logFileName = parameters.getParameter< String >( "log-file" );
const String & outputMode = parameters.getParameter< String >( "output-mode" );
const String & precision = parameters.getParameter< String >( "precision" );
// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
// which have a default value. The workaround below works for int values, but it is not possible
// to pass 64-bit integer values
// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
const int loops = parameters.getParameter< int >( "loops" );
const int verbose = parameters.getParameter< int >( "verbose" );
......@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
return EXIT_FAILURE;
}
std::cout << "== BENCHMARK FINISHED ==" << std::endl;
std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
return EXIT_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment