diff --git a/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h b/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h
index 221fc627468994638bdc34c22bac19e64ae45fe5..7d6ffde49960a1e53b563222af8ab4861accba0f 100644
--- a/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h
+++ b/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h
@@ -126,6 +126,11 @@ struct LightSpMVBenchmark
 #endif
    }
 
+   void setKernelType( LightSpMVBenchmarkKernelType type )
+   {
+      this->kernelType = type;
+   }
+
    void resetVectors()
    {
       this->inVectorView = 1.0;
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index d07a0f6bc6b3933a764143a421561e9c2b4f9eb7..991c6b56ca434093fa283296eba04272b98deaf6 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -39,9 +39,9 @@
 #include <TNL/Algorithms/Segments/BiEllpack.h>
 
 // Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation
-//#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
-//#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
-//#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
+#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
+#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
+#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
 
 // Uncomment the following line to enable benchmarking the sandbox sparse matrix.
 //#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
@@ -374,10 +374,10 @@ benchmarkSpMV( Benchmark& benchmark,
 template< typename Real = double,
           typename Index = int >
 void
-benchmarkSpmvSynthetic( Benchmark& benchmark,
-                        const String& inputFileName,
-                        const Config::ParameterContainer& parameters,
-                        bool verboseMR )
+benchmarkSpmv( Benchmark& benchmark,
+               const String& inputFileName,
+               const Config::ParameterContainer& parameters,
+               bool verboseMR )
 {
    // The following is another workaround because of a bug in nvcc versions 10 and 11.
    // If we use the current matrix formats, not the legacy ones, we get
@@ -469,8 +469,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
        cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector );
    };
 
-   SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
+   SpmvBenchmarkResult< Real, Devices::Host, int > cudaBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
+   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );
 
 #ifdef HAVE_CSR5
    ////
@@ -489,7 +489,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    auto csr5SpMV = [&]() {
        csr5Benchmark.vectorProduct();
    };
-   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults );
+   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults );
    std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
    csrCudaMatrix.reset();
 #endif
@@ -501,7 +501,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
       { "matrix name", convertToString( inputFileName ) },
       { "rows", convertToString( csrHostMatrix.getRows() ) },
       { "columns", convertToString( csrHostMatrix.getColumns() ) },
-      { "matrix format", String( "LightSpMV" ) }
+      { "matrix format", String( "LightSpMV Vector" ) }
    } ));
 
    LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix;
@@ -514,7 +514,16 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    auto spmvLightSpMV = [&]() {
        lightSpMVBenchmark.vectorProduct();
    };
-   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cusparseBenchmarkResults );
+   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
+
+   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+      { "matrix name", convertToString( inputFileName ) },
+      { "rows", convertToString( csrHostMatrix.getRows() ) },
+      { "columns", convertToString( csrHostMatrix.getColumns() ) },
+      { "matrix format", String( "LightSpMV Warp" ) }
+   } ));
+   lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp );
+   benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
 #endif
    csrHostMatrix.reset();
 
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 9a5005de73d06fb3d99709f89a32d8036722cac3..026ed356d67a3e997b032b303d3c7dbdb1e329dc 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -45,7 +45,7 @@ runSpMVBenchmarks( Benchmark & benchmark,
                            metadata );
    // Start the actual benchmark in spmv.h
    try {
-      SpMVLegacy::benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, parameters, verboseMR );
+      SpMVLegacy::benchmarkSpmv< Real >( benchmark, inputFileName, parameters, verboseMR );
    }
    catch( const std::exception& ex ) {
       std::cerr << ex.what() << std::endl;