Newer
Older
Lukas Cejka
committed
/***************************************************************************
spmv.h - description
-------------------
Lukas Cejka
committed
copyright : (C) 2015 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Original implemented by J. Klinkovsky in Benchmarks/BLAS
// This is an edited copy of Benchmarks/BLAS/spmv.h by: Lukas Cejka
Lukas Cejka
committed
#pragma once
#include "../Benchmarks.h"
#include "SpmvBenchmarkResult.h"
Lukas Cejka
committed
#include <TNL/Pointers/DevicePointer.h>
Tomáš Oberhuber
committed
#include <TNL/Matrices/Legacy/CSR.h>
#include <TNL/Matrices/Legacy/Ellpack.h>
#include <TNL/Matrices/Legacy/SlicedEllpack.h>
#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
#include <TNL/Matrices/Legacy/AdEllpack.h>
#include <TNL/Matrices/Legacy/BiEllpack.h>
Lukas Cejka
committed
#include <TNL/Matrices/MatrixReader.h>
#include <TNL/Matrices/MatrixInfo.h>
Tomáš Oberhuber
committed
#include <TNL/Matrices/MatrixType.h>
#include <TNL/Containers/Segments/CSR.h>
#include <TNL/Containers/Segments/Ellpack.h>
#include <TNL/Containers/Segments/SlicedEllpack.h>
Lukas Cejka
committed
using namespace TNL::Matrices;
Lukas Cejka
committed
#include "cusparseCSRMatrix.h"
Lukas Cejka
committed
namespace TNL {
namespace Benchmarks {
namespace SpMVLegacy {
Lukas Cejka
committed
// Alias to match the number of template parameters with other formats
Lukas Cejka
committed
template< typename Real, typename Device, typename Index >
using SlicedEllpackAlias = Matrices::Legacy::SlicedEllpack< Real, Device, Index >;
Lukas Cejka
committed
// Segments based sparse matrix aliases
template< typename Real, typename Device, typename Index >
Tomáš Oberhuber
committed
using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >;
template< typename Device, typename Index, typename IndexAllocator >
using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
template< typename Real, typename Device, typename Index >
Tomáš Oberhuber
committed
using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >;
template< typename Device, typename Index, typename IndexAllocator >
using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
template< typename Real, typename Device, typename Index >
Tomáš Oberhuber
committed
using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;
// Get the name (with extension) of input matrix file
std::string getMatrixFileName( const String& InputFileName )
{
std::string fileName = InputFileName;
const size_t last_slash_idx = fileName.find_last_of( "/\\" );
if( std::string::npos != last_slash_idx )
fileName.erase( 0, last_slash_idx + 1 );
return fileName;
}
Lukas Cejka
committed
// Get only the name of the format from getType()
Lukas Cejka
committed
template< typename Matrix >
std::string getMatrixFormat( const Matrix& matrix )
Lukas Cejka
committed
{
std::string mtrxFullType = getType( matrix );
std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
return format;
}
template< typename Matrix >
std::string getFormatShort( const Matrix& matrix )
{
std::string mtrxFullType = getType( matrix );
std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
format = format.substr( format.find(':') + 2);
format = format.substr( 0, 3 );
return format;
}
// Print information about the matrix.
template< typename Matrix >
void printMatrixInfo( const Matrix& matrix,
std::ostream& str )
str << "\n Format: " << getMatrixFormat( matrix ) << std::endl;
str << " Rows: " << matrix.getRows() << std::endl;
str << " Cols: " << matrix.getColumns() << std::endl;
str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
Lukas Cejka
committed
}
template< typename Real,
template< typename, typename, typename > class Matrix,
template< typename, typename, typename, typename > class Vector = Containers::Vector >
const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
Lukas Cejka
committed
bool verboseMR )
Lukas Cejka
committed
{
using HostMatrix = Matrix< Real, Devices::Host, int >;
using CudaMatrix = Matrix< Real, Devices::Cuda, int >;
using HostVector = Containers::Vector< Real, Devices::Host, int >;
using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
HostMatrix hostMatrix;
MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
{ "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) },
{ "matrix format", MatrixInfo< HostMatrix >::getFormat() }
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
benchmark.setOperation( datasetSize );
/***
* Benchmark SpMV on host
*/
HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() );
auto resetHostVectors = [&]() {
hostInVector = 1.0;
hostOutVector = 0.0;
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostInVector, hostOutVector );
SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
/***
* Benchmark SpMV on CUDA
*/
#ifdef HAVE_CUDA
cudaMatrix = hostMatrix;
CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() );
auto resetCudaVectors = [&]() {
cudaInVector = 1.0;
cudaOutVector = 0.0;
};
auto spmvCuda = [&]() {
cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
};
SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
std::cout << std::endl;
Lukas Cejka
committed
}
template< typename Real = double,
typename Index = int >
Lukas Cejka
committed
const String& inputFileName,
bool verboseMR )
Lukas Cejka
committed
{
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
using CSRHostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
using CSRCudaMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;
using HostVector = Containers::Vector< Real, Devices::Host, int >;
using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
CSRHostMatrix csrHostMatrix;
CSRCudaMatrix csrCudaMatrix;
////
// Set-up benchmark datasize
//
MatrixReader< CSRHostMatrix >::readMtxFile( inputFileName, csrHostMatrix, verboseMR );
const int elements = csrHostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
benchmark.setOperation( datasetSize );
////
// Perform benchmark on host with CSR as a reference CPU format
//
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( csrHostMatrix.getRows() ) },
{ "columns", convertToString( csrHostMatrix.getColumns() ) },
{ "matrix format", String( "CSR" ) }
} ));
HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
auto resetHostVectors = [&]() {
hostInVector = 1.0;
hostOutVector == 0.0;
};
auto spmvCSRHost = [&]() {
csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
};
benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost );
////
// Perform benchmark on CUDA device with cuSparse as a reference GPU format
//
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( csrHostMatrix.getRows() ) },
{ "columns", convertToString( csrHostMatrix.getColumns() ) },
{ "matrix format", String( "cuSparse" ) }
} ));
cusparseHandle_t cusparseHandle;
cusparseCreate( &cusparseHandle );
csrCudaMatrix = csrHostMatrix;
// Delete the CSRhostMatrix, so it doesn't take up unnecessary space
csrHostMatrix.reset();
TNL::CusparseCSR< Real > cusparseMatrix;
cusparseMatrix.init( csrCudaMatrix, &cusparseHandle );
CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() );
auto resetCusparseVectors = [&]() {
cusparseInVector = 1.0;
cusparseOutVector == 0.0;
};
auto spmvCusparse = [&]() {
cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector );
};
benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, Matrices::Legacy::BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
/* AdEllpack is broken
benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
*/
Lukas Cejka
committed
}
Lukas Cejka
committed
} // namespace Benchmarks
} // namespace TNL