Skip to content
Snippets Groups Projects
Commit 9e4d3e06 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Workaround of the nvcc bug concerning use of non-legacy matrix formats in SpMV benchmark.

parent 0994eb03
No related branches found
No related tags found
1 merge request!89To/matrices adaptive csr
/***************************************************************************
tnlCusparseCSRLegacy.h - description
-------------------
begin : Feb 1, 2021
copyright : (C) 2021 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include <TNL/Assert.h>
#include <TNL/Devices/Cuda.h>
#include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h>
#ifdef HAVE_CUDA
#include <cusparse.h>
#endif
namespace TNL {
template< typename Real >
class CusparseCSRBaseLegacy
{
public:
using RealType = Real;
using DeviceType = TNL::Devices::Cuda;
using MatrixType = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Cuda, int >;
CusparseCSRBaseLegacy()
: matrix( 0 )
{
};
#ifdef HAVE_CUDA
void init( const MatrixType& matrix,
cusparseHandle_t* cusparseHandle )
{
this->matrix = &matrix;
this->cusparseHandle = cusparseHandle;
cusparseCreateMatDescr( & this->matrixDescriptor );
};
#endif
int getRows() const
{
return matrix->getRows();
}
int getColumns() const
{
return matrix->getColumns();
}
int getNumberOfMatrixElements() const
{
return matrix->getAllocatedElementsCount();
}
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
#if CUDART_VERSION >= 11000
throw std::runtime_error("cusparseDcsrmv was removed in CUDA 11.");
#else
cusparseDcsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->values.getSize(),
1.0,
this->matrixDescriptor,
this->matrix->values.getData(),
this->matrix->getRowPointers().getData(),
this->matrix->columnIndexes.getData(),
inVector.getData(),
1.0,
outVector.getData() );
#endif
#endif
}
protected:
const MatrixType* matrix;
#ifdef HAVE_CUDA
cusparseHandle_t* cusparseHandle;
cusparseMatDescr_t matrixDescriptor;
#endif
};
template< typename Real >
class CusparseCSRLegacy
{};
template<>
class CusparseCSRLegacy< double > : public CusparseCSRBaseLegacy< double >
{
public:
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
#if CUDART_VERSION >= 11000
throw std::runtime_error("cusparseDcsrmv was removed in CUDA 11.");
#else
double d = 1.0;
double* alpha = &d;
cusparseDcsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->getValues().getSize(),
alpha,
this->matrixDescriptor,
this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
inVector.getData(),
alpha,
outVector.getData() );
#endif
#endif
}
};
template<>
class CusparseCSRLegacy< float > : public CusparseCSRBaseLegacy< float >
{
public:
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
#if CUDART_VERSION >= 11000
throw std::runtime_error("cusparseScsrmv was removed in CUDA 11.");
#else
float d = 1.0;
float* alpha = &d;
cusparseScsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->getValues().getSize(),
alpha,
this->matrixDescriptor,
this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
inVector.getData(),
alpha,
outVector.getData() );
#endif
#endif
}
};
} // namespace TNL
......@@ -38,6 +38,7 @@
using namespace TNL::Matrices;
#include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrix.h>
#include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h>
namespace TNL {
namespace Benchmarks {
......@@ -240,9 +241,27 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
const Config::ParameterContainer& parameters,
bool verboseMR )
{
// The following is another workaround because of a bug in nvcc versions 10 and 11.
// If we use the current matrix formats, not the legacy ones, we get
// ' error: redefinition of ‘void TNL::Algorithms::__wrapper__device_stub_CudaReductionKernel...'
// It seems that there is a problem with lambda functions identification when we create
// two instances of TNL::Matrices::SparseMatrix. The second one comes from calling of
// `benchmarkSpMV< Real, SparseMatrix_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );`
// and simillar later in this function. Maybe splitting this function into two might help.
#define USE_LEGACY_FORMATS
#ifdef USE_LEGACY_FORMATS
// Here we use 'int' instead of 'Index' because of compatibility with cusparse.
using CSRHostMatrix = SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Host, int >;
using CSRCudaMatrix = SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Cuda, int >;
using CusparseMatrix = TNL::CusparseCSRLegacy< Real >;
#else
// Here we use 'int' instead of 'Index' because of compatibility with cusparse.
using CSRHostMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int >;
using CSRCudaMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Cuda, int >;
using CusparseMatrix = TNL::CusparseCSR< Real >;
#endif
using HostVector = Containers::Vector< Real, Devices::Host, int >;
using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
......@@ -302,7 +321,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
// Delete the CSRhostMatrix, so it doesn't take up unnecessary space
csrHostMatrix.reset();
TNL::CusparseCSR< Real > cusparseMatrix;
CusparseMatrix cusparseMatrix;
cusparseMatrix.init( csrCudaMatrix, &cusparseHandle );
CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() );
......@@ -323,14 +342,14 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
/////
// Benchmarking TNL formats
benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );
/*benchmarkSpMV< Real, SparseMatrix_CSR_Vector >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_CSR_Hybrid >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_CSR_Adaptive >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMV< Real, SparseMatrix_BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );*/
benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Vector >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Hybrid >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Adaptive >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SparseMatrix_ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
benchmarkSpMVLegacy< Real, SparseMatrix_BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
const bool withSymmetricMatrices = parameters.getParameter< bool >("with-symmetric-matrices");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment