diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 1ea7a26682e5137e64e0971b5463e6b3006267a3..32d41bb6a3767cab20631259860befa809e972c7 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -23,6 +23,10 @@
 #include <TNL/Matrices/MatrixReader.h>
 using namespace TNL::Matrices;
 
+#include <cusparse.h>
+#include "cusparseCSRMatrix.h"
+using namespace TNL;
+
 namespace TNL {
 namespace Benchmarks {
 
@@ -72,7 +76,7 @@ benchmarkSpMV( Benchmark & benchmark,
       {
          // Start a buffer to capture the output of MatrixReader
          std::stringstream buffer;
-         std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
+         std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
          
          if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
          {
@@ -86,7 +90,7 @@ benchmarkSpMV( Benchmark & benchmark,
             
             //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
             std::stringstream buffer;
-            std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
+            std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
 
             MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix );
 
@@ -106,7 +110,7 @@ benchmarkSpMV( Benchmark & benchmark,
                 stringErrorMsg.erase( stringErrorMsg.length() - 1 );
             
             // https://stackoverflow.com/questions/7352099/stdstring-to-char
-            char* errorMsg = &stringErrorMsg[0u];
+            char* errorMsg = &stringErrorMsg[ 0u ];
             
             
             // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
@@ -147,7 +151,7 @@ benchmarkSpMV( Benchmark & benchmark,
                 stringErrorMsg.erase( stringErrorMsg.length() - 1 );
          
          // https://stackoverflow.com/questions/7352099/stdstring-to-char
-         char *errorMsg = &stringErrorMsg[0u];
+         char *errorMsg = &stringErrorMsg[ 0u ];
          
          // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
          //        a prefix in the log file. 
@@ -206,10 +210,239 @@ benchmarkSpMV( Benchmark & benchmark,
 
     benchmark.setOperation( datasetSize );
     benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
+    
+    // Initialize the host vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts)
+    HostVector resultHostVector2;
+    resultHostVector2.setSize( hostVector2.getSize() );
+    resultHostVector2.setValue( 0.0 );
+    // Copy the values
+    for( int i = 0; i < hostVector2.getSize(); i++ )
+        resultHostVector2.setElement( i, hostVector2.getElement( i ) );
+    
+ #ifdef HAVE_CUDA
+    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
+ #endif
+
+    // Setup the device vector to be compared
+    HostVector resultDeviceVector2;
+    resultDeviceVector2.setSize( hostVector2.getSize() );
+    resultDeviceVector2.setValue( 0.0 );
+    
+//    resultDeviceVector2 += deviceVector2; // Throws a segfault.
+    
+    // Copy the values
+    for( int i = 0; i < deviceVector2.getSize(); i++ )
+        resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) );
+    
+    Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
+    Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
+    
+    std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax );
+    std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm );
+    
+    char *absMax = &resultDifferenceAbsMax[ 0u ];
+    char *lpNorm = &resultDifferenceLpNorm[ 0u ];
+    
+    // FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG
+//    benchmark.addErrorMessage( absMax, 1 );
+//    benchmark.addErrorMessage( lpNorm, 1 );
+    
+    std::cout << std::endl;
+    return true;
+}
+
+// Compares only CSR on GPU and Cusparse on GPU.
+template< typename Real,
+          template< typename, typename, typename > class Vector = Containers::Vector >
+bool
+benchmarkCusparseSpMV( Benchmark & benchmark,
+               const String & inputFileName )
+{    
+    typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix;
+    typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
+    typedef Containers::Vector< Real, Devices::Host, int > HostVector;
+    typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
+    
+    CSR_HostMatrix CSRhostMatrix;
+    CSR_DeviceMatrix CSRdeviceMatrix;
+    CudaVector deviceVector, deviceVector2;
+    
+    try
+      {
+         // Start a buffer to capture the output of MatrixReader
+         std::stringstream buffer;
+         std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
+         
+         if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix ) )
+         {
+            // Capture the original output of MatrixReader, so it isn't printed by console.
+            std::string errorMsgBuffer = buffer.str();
+            // Reset the buffer
+            std::cerr.rdbuf( old );
+            
+             
+            std::string matrixFormat = getMatrixFormat( CSRhostMatrix );
+            
+            //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
+            std::stringstream buffer;
+            std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
+
+            MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix );
+
+            errorMsgBuffer = buffer.str();
+            
+            // Reset the buffer
+            std::cerr.rdbuf( old );
+            
+            std::string stringErrorMsg = "Benchmark failed: Unable to read the matrix.\n"
+                                         "matrix format: " + matrixFormat +
+                                         "\nFailed to read the matrix file " + 
+                                         ( std::string )inputFileName + ".\n" + 
+                                         errorMsgBuffer;
+            
+            //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
+            if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
+                stringErrorMsg.erase( stringErrorMsg.length() - 1 );
+            
+            // https://stackoverflow.com/questions/7352099/stdstring-to-char
+            char* errorMsg = &stringErrorMsg[ 0u ];
+            
+            
+            // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
+            //        a prefix in the log file. 
+            //        (Try adding more benchmarks in benchmarkSpmvSynthetic(...) 
+            //         and you'll see)
+            benchmark.addErrorMessage( errorMsg, 1 );
+            
+            std::cout << std::endl;
+            
+            return false;
+         }
+         std::cerr.rdbuf( old );
+      }
+      catch( std::bad_alloc )
+      {
+         std::string matrixFormat = getMatrixFormat( CSRhostMatrix );
+         
+         //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
+         std::stringstream buffer;
+         std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
+
+         MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix );
+
+         std::string errorMsgBuffer = buffer.str();
+         
+         // Reset the buffer
+         std::cerr.rdbuf( old );
+          
+         std::string stringErrorMsg = "Benchmark failed: Not enough memory.\n"
+                                      "matrix format: " + matrixFormat + 
+                                      "\nFailed to allocate memory to read the matrix file " +
+                                      ( std::string )inputFileName + ".\n" + 
+                                      errorMsgBuffer;
+         
+         //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
+         if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
+                stringErrorMsg.erase( stringErrorMsg.length() - 1 );
+         
+         // https://stackoverflow.com/questions/7352099/stdstring-to-char
+         char *errorMsg = &stringErrorMsg[ 0u ];
+         
+         // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
+         //        a prefix in the log file. 
+         //        (Try adding more benchmarks in benchmarkSpmvSynthetic(...) 
+         //         and you'll see)
+         benchmark.addErrorMessage( errorMsg, 1 );
+         
+         std::cout << std::endl;
+         
+         return false;
+      }
+    
+    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+          { "matrix format", convertToString( getMatrixFormat( CSRhostMatrix ) ) },
+          { "non-zeros", convertToString( CSRhostMatrix.getNumberOfNonzeroMatrixElements() ) },
+          { "rows", convertToString( CSRhostMatrix.getRows() ) },
+          { "columns", convertToString( CSRhostMatrix.getColumns() ) }
+       } ));
+    
+    cusparseHandle_t cusparseHandle;
+    cusparseCreate( &cusparseHandle );
+    
+#ifdef HAVE_CUDA
+    // FIXME: This doesn't work for ChunkedEllpack, because
+    //        its cross-device assignment is not implemented yet
+    CSRdeviceMatrix = CSRhostMatrix;
+    
+    TNL::CusparseCSR< Real > cusparseCSR;
+    cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle );
+#endif
+
+#ifdef HAVE_CUDA
+    deviceVector.setSize( CSRhostMatrix.getColumns() );
+    deviceVector2.setSize( CSRhostMatrix.getRows() );
+#endif
+
+    // reset function
+    auto reset = [&]() {
+ #ifdef HAVE_CUDA
+       deviceVector.setValue( 1.0 );
+       deviceVector2.setValue( 0.0 );
+ #endif
+    };
+
+    const int elements = CSRhostMatrix.getNumberOfNonzeroMatrixElements();
+
+    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+
+    // compute functions
+    auto spmvCuda = [&]() {
+       CSRdeviceMatrix.vectorProduct( deviceVector, deviceVector2 );
+    };
+    auto spmvCusparse = [&]() {
+        cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
+    };
+
+    benchmark.setOperation( datasetSize );
+    
  #ifdef HAVE_CUDA
     benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
+    
+    // Initialize the cuda vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts)
+    HostVector resultCusparseVector2;
+    resultCusparseVector2.setSize( deviceVector2.getSize() );
+    resultCusparseVector2.setValue( 0.0 );
+    // Copy the values
+    for( int i = 0; i < deviceVector2.getSize(); i++ )
+        resultCusparseVector2.setElement( i, deviceVector2.getElement( i ) );
+    
+    benchmark.time< Devices::Cuda >( reset, "GPU-Cusparse", spmvCusparse );
  #endif
+
+    // Setup the device vector to be compared
+    HostVector resultDeviceVector2;
+    resultDeviceVector2.setSize( resultCusparseVector2.getSize() );
+    resultDeviceVector2.setValue( 0.0 );
+    
+    // Copy the values
+    for( int i = 0; i < deviceVector2.getSize(); i++ )
+        resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) );
+    
+    Real differenceAbsMax = resultCusparseVector2.differenceAbsMax( resultDeviceVector2 );
+    Real differenceLpNorm = resultCusparseVector2.differenceLpNorm( resultDeviceVector2, 1 );
+    
+    std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax );
+    std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm );
+    
+    char *absMax = &resultDifferenceAbsMax[ 0u ];
+    char *lpNorm = &resultDifferenceLpNorm[ 0u ];
+    
+    // FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG
+//    benchmark.addErrorMessage( absMax, 1 );
+//    benchmark.addErrorMessage( lpNorm, 1 );
+    
     std::cout << std::endl;
+    cusparseDestroy( cusparseHandle );
     return true;
 }
 
@@ -222,6 +455,10 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
    bool result = true;
    // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
    result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
+   
+   // This doesn't have a titles (matrix format, rows, cols, etc.) in the output, because the header is the same as before (CSR).
+   result |= benchmarkCusparseSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
+   
    result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
    result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
 //   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );