diff --git a/src/Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h b/src/Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h
new file mode 100644
index 0000000000000000000000000000000000000000..133723e988b5cb21177ea440c397d182a654ef05
--- /dev/null
+++ b/src/Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h
@@ -0,0 +1,171 @@
+/***************************************************************************
+                          tnlCusparseCSRLegacy.h  -  description
+                             -------------------
+    begin                : Feb 1, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Assert.h>
+#include <TNL/Devices/Cuda.h>
+#include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h>
+#ifdef HAVE_CUDA
+#include <cusparse.h>
+#endif
+
+namespace TNL {
+
+template< typename Real >
+class CusparseCSRBaseLegacy
+{
+   public:
+      using RealType = Real;
+      using DeviceType = TNL::Devices::Cuda;
+      using MatrixType = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Cuda, int >;
+
+      CusparseCSRBaseLegacy()
+      : matrix( 0 )
+      {
+      };
+
+#ifdef HAVE_CUDA
+      void init( const MatrixType& matrix,
+                 cusparseHandle_t* cusparseHandle )
+      {
+         this->matrix = &matrix;
+         this->cusparseHandle = cusparseHandle;
+         cusparseCreateMatDescr( & this->matrixDescriptor );
+      };
+#endif
+
+      int getRows() const
+      {
+         return matrix->getRows();
+      }
+
+      int getColumns() const
+      {
+         return matrix->getColumns();
+      }
+
+      int getNumberOfMatrixElements() const
+      {
+         return matrix->getAllocatedElementsCount();
+      }
+
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const
+      {
+         TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
+#ifdef HAVE_CUDA
+#if CUDART_VERSION >= 11000
+         throw std::runtime_error("cusparseDcsrmv was removed in CUDA 11.");
+#else
+         cusparseDcsrmv( *( this->cusparseHandle ),
+                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                         this->matrix->getRows(),
+                         this->matrix->getColumns(),
+                         this->matrix->values.getSize(),
+                         1.0,
+                         this->matrixDescriptor,
+                         this->matrix->values.getData(),
+                         this->matrix->getRowPointers().getData(),
+                         this->matrix->columnIndexes.getData(),
+                         inVector.getData(),
+                         1.0,
+                         outVector.getData() );
+#endif
+#endif
+      }
+
+   protected:
+
+      const MatrixType* matrix;
+#ifdef HAVE_CUDA
+      cusparseHandle_t* cusparseHandle;
+
+      cusparseMatDescr_t matrixDescriptor;
+#endif
+};
+
+
+template< typename Real >
+class CusparseCSRLegacy
+{};
+
+template<>
+class CusparseCSRLegacy< double > : public CusparseCSRBaseLegacy< double >
+{
+   public:
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const
+      {
+         TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
+#ifdef HAVE_CUDA
+#if CUDART_VERSION >= 11000
+         throw std::runtime_error("cusparseDcsrmv was removed in CUDA 11.");
+#else
+	 double d = 1.0;
+         double* alpha = &d;
+         cusparseDcsrmv( *( this->cusparseHandle ),
+                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                         this->matrix->getRows(),
+                         this->matrix->getColumns(),
+                         this->matrix->getValues().getSize(),
+                         alpha,
+                         this->matrixDescriptor,
+                         this->matrix->getValues().getData(),
+                         this->matrix->getRowPointers().getData(),
+                         this->matrix->getColumnIndexes().getData(),
+                         inVector.getData(),
+                         alpha,
+                         outVector.getData() );
+#endif
+#endif
+      }
+};
+
+template<>
+class CusparseCSRLegacy< float > : public CusparseCSRBaseLegacy< float >
+{
+   public:
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const
+      {
+         TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
+#ifdef HAVE_CUDA
+#if CUDART_VERSION >= 11000
+         throw std::runtime_error("cusparseScsrmv was removed in CUDA 11.");
+#else
+         float d = 1.0;
+         float* alpha = &d;
+         cusparseScsrmv( *( this->cusparseHandle ),
+                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                         this->matrix->getRows(),
+                         this->matrix->getColumns(),
+                         this->matrix->getValues().getSize(),
+                         alpha,
+                         this->matrixDescriptor,
+                         this->matrix->getValues().getData(),
+                         this->matrix->getRowPointers().getData(),
+                         this->matrix->getColumnIndexes().getData(),
+                         inVector.getData(),
+                         alpha,
+                         outVector.getData() );
+#endif
+#endif
+      }
+};
+
+} // namespace TNL
diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h
index 690bbed7e810b31f14d2d79441f066f05d412225..7c7e19d8074e9ec6c796e87e8e136af07f9dd44c 100644
--- a/src/Benchmarks/SpMV/spmv-legacy.h
+++ b/src/Benchmarks/SpMV/spmv-legacy.h
@@ -38,6 +38,7 @@
 using namespace TNL::Matrices;
 
 #include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrix.h>
+#include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h>
 
 namespace TNL {
    namespace Benchmarks {
@@ -240,9 +241,27 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
                         const Config::ParameterContainer& parameters,
                         bool verboseMR )
 {
+   // The following is another workaround because of a bug in nvcc versions 10 and 11.
+   // If we use the current matrix formats, not the legacy ones, we get
+   // ' error: redefinition of ‘void TNL::Algorithms::__wrapper__device_stub_CudaReductionKernel...'
+   // It seems that there is a problem with lambda functions identification when we create
+   // two instances of TNL::Matrices::SparseMatrix. The second one comes from calling of
+   // `benchmarkSpMV< Real, SparseMatrix_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );`
+   // and simillar later in this function. Maybe splitting this function into two might help.
+#define USE_LEGACY_FORMATS
+#ifdef USE_LEGACY_FORMATS
+   // Here we use 'int' instead of 'Index' because of compatibility with cusparse.
+   using CSRHostMatrix = SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Host, int >;
+   using CSRCudaMatrix = SpMV::ReferenceFormats::Legacy::CSR< Real, Devices::Cuda, int >;
+   using CusparseMatrix = TNL::CusparseCSRLegacy< Real >;
+#else
    // Here we use 'int' instead of 'Index' because of compatibility with cusparse.
    using CSRHostMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int >;
    using CSRCudaMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Cuda, int >;
+   using CusparseMatrix = TNL::CusparseCSR< Real >;
+#endif
+
+
    using HostVector = Containers::Vector< Real, Devices::Host, int >;
    using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
 
@@ -302,7 +321,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    // Delete the CSRhostMatrix, so it doesn't take up unnecessary space
    csrHostMatrix.reset();
 
-   TNL::CusparseCSR< Real > cusparseMatrix;
+   CusparseMatrix cusparseMatrix;
    cusparseMatrix.init( csrCudaMatrix, &cusparseHandle );
 
    CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() );
@@ -323,14 +342,14 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    /////
    // Benchmarking TNL formats
    benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Scalar                   >( benchmark, hostOutVector, inputFileName, verboseMR );
-   /*benchmarkSpMV< Real, SparseMatrix_CSR_Vector                   >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_CSR_Hybrid                   >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_CSR_Adaptive                 >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_Ellpack                      >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack                >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_ChunkedEllpack               >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_BiEllpack                    >( benchmark, hostOutVector, inputFileName, verboseMR );*/
+   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Vector                   >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Hybrid                   >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrix_CSR_Adaptive                 >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrix_Ellpack                      >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrix_SlicedEllpack                >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrix_ChunkedEllpack               >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrix_BiEllpack                    >( benchmark, hostOutVector, inputFileName, verboseMR );
 
 
    const bool withSymmetricMatrices = parameters.getParameter< bool >("with-symmetric-matrices");