diff --git a/src/Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h b/src/Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h new file mode 100644 index 0000000000000000000000000000000000000000..133723e988b5cb21177ea440c397d182a654ef05 --- /dev/null +++ b/src/Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h @@ -0,0 +1,171 @@ +/*************************************************************************** + tnlCusparseCSRLegacy.h - description + ------------------- + begin : Feb 1, 2021 + copyright : (C) 2021 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Assert.h> +#include <TNL/Devices/Cuda.h> +#include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h> +#ifdef HAVE_CUDA +#include <cusparse.h> +#endif + +namespace TNL { + +template< typename Real > +class CusparseCSRBaseLegacy +{ + public: + using RealType = Real; + using DeviceType = TNL::Devices::Cuda; + using MatrixType = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Cuda, int >; + + CusparseCSRBaseLegacy() + : matrix( 0 ) + { + }; + +#ifdef HAVE_CUDA + void init( const MatrixType& matrix, + cusparseHandle_t* cusparseHandle ) + { + this->matrix = &matrix; + this->cusparseHandle = cusparseHandle; + cusparseCreateMatDescr( & this->matrixDescriptor ); + }; +#endif + + int getRows() const + { + return matrix->getRows(); + } + + int getColumns() const + { + return matrix->getColumns(); + } + + int getNumberOfMatrixElements() const + { + return matrix->getAllocatedElementsCount(); + } + + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const + { + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); +#ifdef HAVE_CUDA +#if CUDART_VERSION >= 11000 + throw std::runtime_error("cusparseDcsrmv was removed in CUDA 11."); +#else + cusparseDcsrmv( *( this->cusparseHandle ), + CUSPARSE_OPERATION_NON_TRANSPOSE, + this->matrix->getRows(), + this->matrix->getColumns(), + this->matrix->values.getSize(), + 1.0, + this->matrixDescriptor, + this->matrix->values.getData(), + this->matrix->getRowPointers().getData(), + this->matrix->columnIndexes.getData(), + inVector.getData(), + 1.0, + outVector.getData() ); +#endif +#endif + } + + protected: + + const MatrixType* matrix; +#ifdef HAVE_CUDA + cusparseHandle_t* cusparseHandle; + + cusparseMatDescr_t matrixDescriptor; +#endif +}; + + +template< typename Real > +class CusparseCSRLegacy +{}; + +template<> +class CusparseCSRLegacy< double > : public CusparseCSRBaseLegacy< double > +{ + public: + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const + { + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); +#ifdef HAVE_CUDA +#if CUDART_VERSION >= 11000 + throw std::runtime_error("cusparseDcsrmv was removed in CUDA 11."); +#else + double d = 1.0; + double* alpha = &d; + cusparseDcsrmv( *( this->cusparseHandle ), + CUSPARSE_OPERATION_NON_TRANSPOSE, + this->matrix->getRows(), + this->matrix->getColumns(), + this->matrix->getValues().getSize(), + alpha, + this->matrixDescriptor, + this->matrix->getValues().getData(), + this->matrix->getRowPointers().getData(), + this->matrix->getColumnIndexes().getData(), + inVector.getData(), + alpha, + outVector.getData() ); +#endif +#endif + } +}; + +template<> +class CusparseCSRLegacy< float > : public CusparseCSRBaseLegacy< float > +{ + public: + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const + { + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); +#ifdef HAVE_CUDA +#if CUDART_VERSION >= 11000 + throw std::runtime_error("cusparseScsrmv was removed in CUDA 11."); +#else + float d = 1.0; + float* alpha = &d; + cusparseScsrmv( *( this->cusparseHandle ), + CUSPARSE_OPERATION_NON_TRANSPOSE, + this->matrix->getRows(), + this->matrix->getColumns(), + this->matrix->getValues().getSize(), + alpha, + this->matrixDescriptor, + this->matrix->getValues().getData(), + this->matrix->getRowPointers().getData(), + this->matrix->getColumnIndexes().getData(), + inVector.getData(), + alpha, + outVector.getData() ); +#endif +#endif + } +}; + +} // namespace TNL diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h index 690bbed7e810b31f14d2d79441f066f05d412225..7c7e19d8074e9ec6c796e87e8e136af07f9dd44c 100644 --- a/src/Benchmarks/SpMV/spmv-legacy.h +++ b/src/Benchmarks/SpMV/spmv-legacy.h @@ -38,6 +38,7 @@ using namespace TNL::Matrices; #include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrix.h> +#include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h> namespace TNL { namespace Benchmarks { @@ -240,9 +241,27 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, const Config::ParameterContainer& parameters, bool verboseMR ) { + // The following is another workaround because of a bug in nvcc versions 10 and 11. + // If we use the current matrix formats, not the legacy ones, we get + // ' error: redefinition of â€void TNL::Algorithms::__wrapper__device_stub_CudaReductionKernel...' + // It seems that there is a problem with lambda functions identification when we create + // two instances of TNL::Matrices::SparseMatrix. The second one comes from calling of + // `benchmarkSpMV< Real, SparseMatrix_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );` + // and simillar later in this function. Maybe splitting this function into two might help. +#define USE_LEGACY_FORMATS +#ifdef USE_LEGACY_FORMATS + // Here we use 'int' instead of 'Index' because of compatibility with cusparse. + using CSRHostMatrix = SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Host, int >; + using CSRCudaMatrix = SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Cuda, int >; + using CusparseMatrix = TNL::CusparseCSRLegacy< Real >; +#else // Here we use 'int' instead of 'Index' because of compatibility with cusparse. using CSRHostMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int >; using CSRCudaMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Cuda, int >; + using CusparseMatrix = TNL::CusparseCSR< Real >; +#endif + + using HostVector = Containers::Vector< Real, Devices::Host, int >; using CudaVector = Containers::Vector< Real, Devices::Cuda, int >; @@ -302,7 +321,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, // Delete the CSRhostMatrix, so it doesn't take up unnecessary space csrHostMatrix.reset(); - TNL::CusparseCSR< Real > cusparseMatrix; + CusparseMatrix cusparseMatrix; cusparseMatrix.init( csrCudaMatrix, &cusparseHandle ); CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() ); @@ -323,14 +342,14 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, ///// // Benchmarking TNL formats benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR ); - /*benchmarkSpMV< Real, SparseMatrix_CSR_Vector >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_CSR_Hybrid >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_CSR_Adaptive >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );*/ + benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Vector >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Hybrid >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Adaptive >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SparseMatrix_ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMVLegacy< Real, SparseMatrix_BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); const bool withSymmetricMatrices = parameters.getParameter< bool >("with-symmetric-matrices");