diff --git a/tests/benchmarks/tnl-cuda-benchmarks.h b/tests/benchmarks/tnl-cuda-benchmarks.h
index d46626f6ef0c849f8f28fe6721e646b29546de3c..7d5f562c8729d2d8cc2d61943fd242756522874f 100644
--- a/tests/benchmarks/tnl-cuda-benchmarks.h
+++ b/tests/benchmarks/tnl-cuda-benchmarks.h
@@ -19,9 +19,10 @@
 #define TNLCUDBENCHMARKS_H_
 
 #include <core/tnlList.h>
-#include <matrices/tnlSlicedEllpackMatrix.h>
-#include <matrices/tnlEllpackMatrix.h>
 #include <matrices/tnlCSRMatrix.h>
+#include <matrices/tnlEllpackMatrix.h>
+#include <matrices/tnlSlicedEllpackMatrix.h>
+#include <matrices/tnlChunkedEllpackMatrix.h>
 
 #include "array-operations.h"
 #include "vector-operations.h"
@@ -244,9 +245,11 @@ int main( int argc, char* argv[] )
       {"elements per row", elementsPerRow},
    } ));
 
+   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
+   benchmarkSpMV< Real, tnlCSRMatrix >( benchmark, loops, size, elementsPerRow );
    benchmarkSpMV< Real, tnlEllpackMatrix >( benchmark, loops, size, elementsPerRow );
    benchmarkSpMV< Real, SlicedEllpackMatrix >( benchmark, loops, size, elementsPerRow );
-   benchmarkSpMV< Real, tnlCSRMatrix >( benchmark, loops, size, elementsPerRow );
+   benchmarkSpMV< Real, tnlChunkedEllpackMatrix >( benchmark, loops, size, elementsPerRow );
 
 
    if( ! benchmark.save( logFile ) )