Newer
Older
Lukas Cejka
committed
/***************************************************************************
spmv.h - description
-------------------
begin : Dec 30, 2015
copyright : (C) 2015 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Jakub Klinkovsky
#pragma once
#include "../Benchmarks.h"
#include <TNL/Pointers/DevicePointer.h>
#include <TNL/Matrices/CSR.h>
#include <TNL/Matrices/Ellpack.h>
#include <TNL/Matrices/SlicedEllpack.h>
#include <TNL/Matrices/ChunkedEllpack.h>
Lukas Cejka
committed
#include <TNL/Matrices/MatrixReader.h>
using namespace TNL::Matrices;
Lukas Cejka
committed
Lukas Cejka
committed
namespace TNL {
namespace Benchmarks {
// silly alias to match the number of template parameters with other formats
template< typename Real, typename Device, typename Index >
using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;
template< typename Matrix >
void printMatrixInfo( const String& inputFileName,
const Matrix& matrix,
std::ostream& str )
Lukas Cejka
committed
{
// Get only the name of the format from getType().
std::string mtrxFullType = matrix.getType();
std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
std::string type = mtrxType.substr(mtrxType.find(':') + 2);
str << "\n Format: " << type << std::endl;
str << " Rows: " << matrix.getRows() << std::endl;
str << " Cols: " << matrix.getColumns() << std::endl;
str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
Lukas Cejka
committed
}
// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
template< typename Real,
template< typename, typename, typename > class Matrix,
template< typename, typename, typename > class Vector = Containers::Vector >
bool
benchmarkSpMV( Benchmark & benchmark,
Lukas Cejka
committed
const String & inputFileName )
Lukas Cejka
committed
{
Lukas Cejka
committed
typedef Matrix< Real, Devices::Host, int > HostMatrix;
typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
typedef Containers::Vector< Real, Devices::Host, int > HostVector;
typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
HostMatrix hostMatrix;
DeviceMatrix deviceMatrix;
HostVector hostVector, hostVector2;
CudaVector deviceVector, deviceVector2;
try
{
if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
{
Lukas Cejka
committed
std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
return false;
}
}
catch( std::bad_alloc )
{
std::cerr << "Not enough memory to read the matrix." << std::endl;
return false;
}
printMatrixInfo( inputFileName, hostMatrix, std::cout );
#ifdef HAVE_CUDA
// FIXME: This doesn't work for ChunkedEllpack, because
// its cross-device assignment is not implemented yet.
deviceMatrix = hostMatrix;
#endif
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) }
} ));
hostVector.setSize( hostMatrix.getColumns() );
hostVector2.setSize( hostMatrix.getRows() );
#ifdef HAVE_CUDA
deviceVector.setSize( hostMatrix.getColumns() );
deviceVector2.setSize( hostMatrix.getRows() );
#endif
// reset function
auto reset = [&]() {
hostVector.setValue( 1.0 );
hostVector2.setValue( 0.0 );
#ifdef HAVE_CUDA
deviceVector.setValue( 1.0 );
deviceVector2.setValue( 0.0 );
#endif
};
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
// compute functions
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostVector, hostVector2 );
};
auto spmvCuda = [&]() {
deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
};
benchmark.setOperation( datasetSize );
benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
#endif
return true;
Lukas Cejka
committed
}
template< typename Real = double,
typename Index = int >
bool
benchmarkSpmvSynthetic( Benchmark & benchmark,
Lukas Cejka
committed
const String& inputFileName )
Lukas Cejka
committed
{
bool result = true;
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
Lukas Cejka
committed
result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
Lukas Cejka
committed
return result;
}
} // namespace Benchmarks
} // namespace TNL