Newer
Older
Lukas Cejka
committed
/***************************************************************************
spmv.h - description
-------------------
Lukas Cejka
committed
copyright : (C) 2015 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Original implemented by J. Klinkovsky in Benchmarks/BLAS
// This is an edited copy of Benchmarks/BLAS/spmv.h by: Lukas Cejka
Lukas Cejka
committed
#pragma once
#include "../Benchmarks.h"
#include "SpmvBenchmarkResult.h"
Lukas Cejka
committed
#include <TNL/Pointers/DevicePointer.h>
Tomáš Oberhuber
committed
#include <TNL/Matrices/Legacy/CSR.h>
#include <TNL/Matrices/Legacy/Ellpack.h>
#include <TNL/Matrices/Legacy/SlicedEllpack.h>
#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
#include <TNL/Matrices/Legacy/AdEllpack.h>
#include <TNL/Matrices/Legacy/BiEllpack.h>
Lukas Cejka
committed
#include <TNL/Matrices/MatrixReader.h>
#include <TNL/Matrices/MatrixInfo.h>
Tomáš Oberhuber
committed
#include <TNL/Matrices/MatrixType.h>
#include <TNL/Containers/Segments/CSR.h>
#include <TNL/Containers/Segments/Ellpack.h>
#include <TNL/Containers/Segments/SlicedEllpack.h>
Lukas Cejka
committed
using namespace TNL::Matrices;
Lukas Cejka
committed
#include "cusparseCSRMatrix.h"
Lukas Cejka
committed
namespace TNL {
namespace Benchmarks {
namespace SpMVLegacy {
Lukas Cejka
committed
// Alias to match the number of template parameters with other formats
Lukas Cejka
committed
template< typename Real, typename Device, typename Index >
using SlicedEllpackAlias = Matrices::Legacy::SlicedEllpack< Real, Device, Index >;
Lukas Cejka
committed
// Segments based sparse matrix aliases
template< typename Real, typename Device, typename Index >
Tomáš Oberhuber
committed
using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >;
template< typename Device, typename Index, typename IndexAllocator >
using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
template< typename Real, typename Device, typename Index >
Tomáš Oberhuber
committed
using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >;
template< typename Device, typename Index, typename IndexAllocator >
using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
template< typename Real, typename Device, typename Index >
Tomáš Oberhuber
committed
using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;
// Legacy formats
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Scalar = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRScalar >;
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Vector = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRVector >;
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Light = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRLight >;
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Adaptive = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRAdaptive >;
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Stream = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRStream >;
// Get the name (with extension) of input matrix file
std::string getMatrixFileName( const String& InputFileName )
{
std::string fileName = InputFileName;
const size_t last_slash_idx = fileName.find_last_of( "/\\" );
if( std::string::npos != last_slash_idx )
fileName.erase( 0, last_slash_idx + 1 );
return fileName;
}
Lukas Cejka
committed
// Get only the name of the format from getType()
Lukas Cejka
committed
template< typename Matrix >
std::string getMatrixFormat( const Matrix& matrix )
Lukas Cejka
committed
{
std::string mtrxFullType = getType( matrix );
std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
return format;
}
template< typename Matrix >
std::string getFormatShort( const Matrix& matrix )
{
std::string mtrxFullType = getType( matrix );
std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
format = format.substr( format.find(':') + 2);
format = format.substr( 0, 3 );
return format;
}
// Print information about the matrix.
template< typename Matrix >
void printMatrixInfo( const Matrix& matrix,
std::ostream& str )
str << "\n Format: " << getMatrixFormat( matrix ) << std::endl;
str << " Rows: " << matrix.getRows() << std::endl;
str << " Cols: " << matrix.getColumns() << std::endl;
str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
Lukas Cejka
committed
}
template< typename Real,
template< typename, typename, typename > class Matrix,
template< typename, typename, typename, typename > class Vector = Containers::Vector >
const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
Lukas Cejka
committed
bool verboseMR )
Lukas Cejka
committed
{
using HostMatrix = Matrix< Real, Devices::Host, int >;
using CudaMatrix = Matrix< Real, Devices::Cuda, int >;
using HostVector = Containers::Vector< Real, Devices::Host, int >;
using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
HostMatrix hostMatrix;
MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( inputFileName ) },
{ "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) },
{ "matrix format", MatrixInfo< HostMatrix >::getFormat() }
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
benchmark.setOperation( datasetSize );
/***
* Benchmark SpMV on host
*/
HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() );
auto resetHostVectors = [&]() {
hostInVector = 1.0;
hostOutVector = 0.0;
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostInVector, hostOutVector );
SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
/***
* Benchmark SpMV on CUDA
*/
#ifdef HAVE_CUDA
cudaMatrix = hostMatrix;
CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() );
auto resetCudaVectors = [&]() {
cudaInVector = 1.0;
cudaOutVector = 0.0;
};
auto spmvCuda = [&]() {
cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
};
SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
std::cout << std::endl;
Lukas Cejka
committed
}
template< typename Real = double,
typename Index = int >
Lukas Cejka
committed
const String& inputFileName,
bool verboseMR )
Lukas Cejka
committed
{
using CSRHostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
using CSRCudaMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;
using HostVector = Containers::Vector< Real, Devices::Host, int >;
using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
CSRHostMatrix csrHostMatrix;
CSRCudaMatrix csrCudaMatrix;
////
// Set-up benchmark datasize
//
MatrixReader< CSRHostMatrix >::readMtxFile( inputFileName, csrHostMatrix, verboseMR );
const int elements = csrHostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
benchmark.setOperation( datasetSize );
////
// Perform benchmark on host with CSR as a reference CPU format
//
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( inputFileName ) },
{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( csrHostMatrix.getRows() ) },
{ "columns", convertToString( csrHostMatrix.getColumns() ) },
{ "matrix format", String( "CSR" ) }
} ));
HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
auto resetHostVectors = [&]() {
hostInVector = 1.0;
hostOutVector == 0.0;
};
auto spmvCSRHost = [&]() {
csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
};
benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost );
////
// Perform benchmark on CUDA device with cuSparse as a reference GPU format
//
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( inputFileName ) },
{ "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( csrHostMatrix.getRows() ) },
{ "columns", convertToString( csrHostMatrix.getColumns() ) },
{ "matrix format", String( "cuSparse" ) }
} ));
cusparseHandle_t cusparseHandle;
cusparseCreate( &cusparseHandle );
csrCudaMatrix = csrHostMatrix;
// Delete the CSRhostMatrix, so it doesn't take up unnecessary space
csrHostMatrix.reset();
TNL::CusparseCSR< Real > cusparseMatrix;
cusparseMatrix.init( csrCudaMatrix, &cusparseHandle );
CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() );
auto resetCusparseVectors = [&]() {
cusparseInVector = 1.0;
cusparseOutVector == 0.0;
};
auto spmvCusparse = [&]() {
cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector );
};
benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Vector >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Adaptive >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Stream >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, Matrices::Legacy::BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
/* AdEllpack is broken
benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
*/
Lukas Cejka
committed
}
Lukas Cejka
committed
} // namespace Benchmarks
} // namespace TNL