diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 4caf0fbda397f92d8cb7c143a12896b89600beb0..cbd628b03e13f9c792e2b9dd90b5573ea3ea4568 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -51,7 +51,13 @@ struct BenchmarkResult
    virtual RowElements getRowElements() const
-      return RowElements({ time, stddev, stddev / time, bandwidth, speedup });
+      RowElements elements;
+      elements << time << stddev << stddev / time << bandwidth;
+      if( speedup != 0 )
+         elements << speedup;
+      else 
+         elements << "N/A";
+      return elements;
diff --git a/src/Benchmarks/LinearSolvers/benchmarks.h b/src/Benchmarks/LinearSolvers/benchmarks.h
index 7b22cdfc1ef30aaddc29cc35067962ed4530f257..a4c04578d5553f2f039a8e7fc575de0ad116c48d 100644
--- a/src/Benchmarks/LinearSolvers/benchmarks.h
+++ b/src/Benchmarks/LinearSolvers/benchmarks.h
@@ -160,8 +160,14 @@ benchmarkSolver( Benchmark& benchmark,
          r = b - r;
          const double residue_true = lpNorm( r, 2.0 ) / lpNorm( b, 2.0 );
-         return RowElements({ time, stddev, stddev/time, speedup, (double) converged, (double) iterations,
-                              residue_precond, residue_true });
+         RowElements elements;
+         elements << time << stddev << stddev/time;
+         if( speedup != 0  )
+            elements << speedup;
+         else
+            elements <<  "N/A";
+         elements << ( converged ? "yes" : "no" ) << iterations << residue_precond << residue_true;
+         return elements;
    MyBenchmarkResult benchmarkResult( solver, matrix, x, b );
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 61608d364e769fe4b6e68a0691957745ee805496..fb4426bb13daa9f59e2518c1ed11a971ccd525ab 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -25,6 +25,55 @@
 namespace TNL {
 namespace Benchmarks {
+class LoggingRowElements
+   public:
+      LoggingRowElements()
+      {
+         stream << std::setprecision( 6 ) << std::fixed;
+      }
+      template< typename T >
+      LoggingRowElements& operator << ( const T& b )
+      {
+         stream << b;
+         elements.push_back( stream.str() );
+         stream.str( std::string() );
+         return *this;
+      }
+      LoggingRowElements& operator << ( decltype( std::setprecision( 2 ) )& setprec )
+      {
+         stream << setprec;
+         return *this;
+      }
+      LoggingRowElements& operator << ( decltype( std::fixed )& setfixed ) // the same works also for std::scientific
+      {
+         stream << setfixed;
+         return *this;
+      }
+      // iterators
+      auto begin() noexcept { return elements.begin(); }
+      auto begin() const noexcept { return elements.begin(); }
+      auto cbegin() const noexcept { return elements.cbegin(); }
+      auto end() noexcept { return elements.end(); }
+      auto end() const noexcept { return elements.end(); }
+      auto cend() const noexcept { return elements.cend(); }
+   protected:
+      std::list< String > elements;
+      std::stringstream stream;
 class Logging
@@ -33,7 +82,7 @@ public:
    using MetadataColumns = std::vector<MetadataElement>;
    using HeaderElements = std::vector< String >;
-   using RowElements = std::vector< double >;
+   using RowElements = LoggingRowElements;
    Logging( int verbose = true )
    : verbose(verbose)
@@ -131,9 +180,7 @@ public:
          // spanning element is printed as usual column to stdout
          std::cout << std::setw( 15 ) << spanningElement;
          for( auto & it : subElements ) {
-            std::cout << std::setw( 15 );
-            if( it != 0.0 )std::cout << it;
-            else std::cout << "N/A";
+            std::cout << std::setw( 15 ) << it;
          std::cout << std::endl;
@@ -147,8 +194,7 @@ public:
       // benchmark data are indented
       const String indent = "    ";
       for( auto & it : subElements ) {
-         if( it != 0.0 ) log << indent << it << std::endl;
-         else log << indent << "N/A" << std::endl;
+         log << indent << it << std::endl;
diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index 699be6efd1be5a06f978dfdc8da6d8dbed71add2..02ef6b61220fa404a704f36443f936c64b51174f 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -15,37 +15,42 @@
 namespace TNL {
 namespace Benchmarks {
-template< typename Real = double,
-          typename Index = int >
+template< typename Real,
+          typename Device,
+          typename Index >
 struct SpmvBenchmarkResult
 : public BenchmarkResult
    using RealType = Real;
+   using DeviceType = Device;
    using IndexType = Index;
    using HostVector = Containers::Vector< Real, Devices::Host, Index >;
-   using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >;
+   using BenchmarkVector = Containers::Vector< Real, Device, Index >;
-   SpmvBenchmarkResult( CudaVector& cudaResult, HostVector& hostResult, CudaVector& cusparseResult )
-   : hostResult( hostResult ), cudaResult( cudaResult), cusparseResult( cusparseResult ){};
+   SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult )
+   : csrResult( csrResult ), benchmarkResult( benchmarkResult ){};
    virtual HeaderElements getTableHeader() const override
-      return HeaderElements({"time", "stddev", "stddev/time", "speedup", "Host.Diff.Max", "Host.Diff.L2", "Cusparse.Diff.Max", "Cusparse.Diff.L2"});
+      return HeaderElements( {"time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} );
    virtual RowElements getRowElements() const override
-      HostVector cudaCopy, cusparseCopy, a, b;
-      cudaCopy = cudaResult;
-      cusparseCopy = cusparseResult;
-      a = cudaCopy - hostResult;
-      b = cudaCopy - cusparseCopy;
-      return RowElements({ time, stddev, stddev/time, speedup, max( abs( a ) ), lpNorm( a, 2.0 ), max( abs( b ) ), lpNorm( b, 2.0 ) });
+      HostVector benchmarkResultCopy;
+      benchmarkResultCopy = benchmarkResult;
+      auto diff = csrResult - benchmarkResultCopy;
+      RowElements elements;
+      elements << time << stddev << stddev/time << bandwidth;
+      if( speedup != 0.0 )
+         elements << speedup;
+      else elements << "N/A";
+      elements << max( abs( diff ) ) << lpNorm( diff, 2.0 );
+      return elements;
-   HostVector &hostResult;
-   CudaVector &cudaResult, &cusparseResult;
+   const HostVector& csrResult;
+   const BenchmarkVector& benchmarkResult;
 } //namespace Benchmarks
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv-legacy.h
similarity index 57%
rename from src/Benchmarks/SpMV/spmv.h
rename to src/Benchmarks/SpMV/spmv-legacy.h
index 8851e4114362024953255ce7a7b1b82322b2bab6..b7bdba6ca973e898d08cac7fda991b98aba4eb59 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv-legacy.h
@@ -38,7 +38,8 @@ using namespace TNL::Matrices;
 #include "cusparseCSRMatrix.h"
 namespace TNL {
-namespace Benchmarks {
+   namespace Benchmarks {
+      namespace SpMVLegacy {
 // Alias to match the number of template parameters with other formats
 template< typename Real, typename Device, typename Index >
@@ -111,142 +112,66 @@ template< typename Real,
           template< typename, typename, typename, typename > class Vector = Containers::Vector >
 benchmarkSpMV( Benchmark& benchmark,
+               const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
                const String& inputFileName,
                bool verboseMR )
-   // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function
-   using CSR_HostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
-   using CSR_DeviceMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;
-   CSR_HostMatrix CSRhostMatrix;
-   CSR_DeviceMatrix CSRdeviceMatrix;
-   // Read the matrix for CSR, to set up cuSPARSE
-   MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR );
-#ifdef HAVE_CUDA
-   // cuSPARSE handle setup
-   cusparseHandle_t cusparseHandle;
-   cusparseCreate( &cusparseHandle );
-   // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device
-   CSRdeviceMatrix = CSRhostMatrix;
-   // Delete the CSRhostMatrix, so it doesn't take up unnecessary space
-   CSRhostMatrix.reset();
-   // Initialize the cusparseCSR matrix.
-   TNL::CusparseCSR< Real > cusparseCSR;
-   cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle );
-   // Setup the format which is given as a template parameter to this function
-   typedef Matrix< Real, Devices::Host, int > HostMatrix;
-   typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
-   typedef Containers::Vector< Real, Devices::Host, int > HostVector;
-   typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
+   using HostMatrix = Matrix< Real, Devices::Host, int >;
+   using CudaMatrix = Matrix< Real, Devices::Cuda, int >;
+   using HostVector = Containers::Vector< Real, Devices::Host, int >;
+   using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
    HostMatrix hostMatrix;
-   DeviceMatrix deviceMatrix;
-   HostVector hostVector, hostVector2;
-   CudaVector deviceVector, deviceVector2, cusparseVector;
+   CudaMatrix cudaMatrix;
-   // Load the format
    MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
-   // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
-   //  because we need the matrix loaded first to get the rows and columns
    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
          { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
          { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
          { "rows", convertToString( hostMatrix.getRows() ) },
          { "columns", convertToString( hostMatrix.getColumns() ) },
-         { "matrix format", MatrixInfo< HostMatrix >::getFormat() } //convertToString( getType( hostMatrix ) ) }
+         { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
       } ));
+   const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
+   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   benchmark.setOperation( datasetSize );
-   hostVector.setSize( hostMatrix.getColumns() );
-   hostVector2.setSize( hostMatrix.getRows() );
+   /***
+    * Benchmark SpMV on host
+    */
+   HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() );
-#ifdef HAVE_CUDA
-   deviceMatrix = hostMatrix;
-   deviceVector.setSize( hostMatrix.getColumns() );
-   deviceVector2.setSize( hostMatrix.getRows() );
-   cusparseVector.setSize( hostMatrix.getRows() );
-   // reset function
    auto resetHostVectors = [&]() {
-      hostVector = 1.0;
-      hostVector2 = 0.0;
-   };
-#ifdef HAVE_CUDA
-   auto resetCudaVectors = [&]() {
-      deviceVector = 1.0;
-      deviceVector2 = 0.0;
+      hostInVector = 1.0;
+      hostOutVector = 0.0;
-   auto resetCusparseVectors = [&]() {
-      deviceVector = 1.0;
-      cusparseVector == 0.0;
-   };
- #endif
-   const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-    // compute functions
    auto spmvHost = [&]() {
-      hostMatrix.vectorProduct( hostVector, hostVector2 );
-   };
-#ifdef HAVE_CUDA
-   auto spmvCuda = [&]() {
-      deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
-   };
+      hostMatrix.vectorProduct( hostInVector, hostOutVector );
-   auto spmvCusparse = [&]() {
-       cusparseCSR.vectorProduct( deviceVector, cusparseVector );
-   benchmark.setOperation( datasetSize );
-   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost );
-   // Initialize the host vector to be compared.
-   //  (The values in hostVector2 will be reset when spmvCuda starts)
-   HostVector resultHostVector2;
-   resultHostVector2.setSize( hostVector2.getSize() );
-   resultHostVector2.setValue( 0.0 );
-   // Copy the values
-   resultHostVector2 = hostVector2;
+   SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
+   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
+   /***
+    * Benchmark SpMV on CUDA
+    */
 #ifdef HAVE_CUDA
-   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda );
-   // Initialize the device vector to be compared.
-   //  (The values in deviceVector2 will be reset when spmvCusparse starts)
-   HostVector resultDeviceVector2;
-   resultDeviceVector2.setSize( deviceVector2.getSize() );
-   resultDeviceVector2.setValue( 0.0 );
-   resultDeviceVector2 = deviceVector2;
-   // Setup cuSPARSE MetaData, since it has the same header as CSR,
-   //  and therefore will not get its own headers (rows, cols, speedup etc.) in log.
-   //      * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten.
-   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
-         { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
-         { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
-         { "rows", convertToString( hostMatrix.getRows() ) },
-         { "columns", convertToString( hostMatrix.getColumns() ) },
-         { "matrix format", convertToString( "CSR-cuSPARSE-" + getFormatShort( hostMatrix ) ) }
-      } ));
+   cudaMatrix = hostMatrix;
+   CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() );
-   SpmvBenchmarkResult< Real, int > benchmarkResult( deviceVector2, hostVector2, cusparseVector );
-   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, benchmarkResult );
+   auto resetCudaVectors = [&]() {
+      cudaInVector = 1.0;
+      cudaOutVector = 0.0;
+   };
+   auto spmvCuda = [&]() {
+      cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
+   };
+   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
+   benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
     std::cout << std::endl;
@@ -257,27 +182,96 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
                         const String& inputFileName,
                         bool verboseMR )
-   benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::Legacy::BiEllpack >( benchmark, inputFileName, verboseMR );
+   using CSRHostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
+   using CSRCudaMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;
+   using HostVector = Containers::Vector< Real, Devices::Host, int >;
+   using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
+   CSRHostMatrix csrHostMatrix;
+   CSRCudaMatrix csrCudaMatrix;
+   ////
+   // Set-up benchmark datasize
+   //
+   MatrixReader< CSRHostMatrix >::readMtxFile( inputFileName, csrHostMatrix, verboseMR );
+   const int elements = csrHostMatrix.getNumberOfNonzeroMatrixElements();
+   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   benchmark.setOperation( datasetSize );
+   ////
+   // Perform benchmark on host with CSR as a reference CPU format
+   //
+   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+         { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
+         { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
+         { "rows", convertToString( csrHostMatrix.getRows() ) },
+         { "columns", convertToString( csrHostMatrix.getColumns() ) },
+         { "matrix format", String( "CSR" ) }
+      } ));
+   HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
+   auto resetHostVectors = [&]() {
+      hostInVector = 1.0;
+      hostOutVector == 0.0;
+   };
+   auto spmvCSRHost = [&]() {
+       csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
+   };
+   benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost );
-   // Segments based sparse matrices
+   // Perform benchmark on CUDA device with cuSparse as a reference GPU format
+#ifdef HAVE_CUDA
+   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+         { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
+         { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
+         { "rows", convertToString( csrHostMatrix.getRows() ) },
+         { "columns", convertToString( csrHostMatrix.getColumns() ) },
+         { "matrix format", String( "cuSparse" ) }
+      } ));
+   cusparseHandle_t cusparseHandle;
+   cusparseCreate( &cusparseHandle );
+   csrCudaMatrix = csrHostMatrix;
+   // Delete the CSRhostMatrix, so it doesn't take up unnecessary space
+   csrHostMatrix.reset();
+   TNL::CusparseCSR< Real > cusparseMatrix;
+   cusparseMatrix.init( csrCudaMatrix, &cusparseHandle );
+   CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() );
+   auto resetCusparseVectors = [&]() {
+      cusparseInVector = 1.0;
+      cusparseOutVector == 0.0;
+   };
+   auto spmvCusparse = [&]() {
+       cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector );
+   };
+   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
-   // AdEllpack is broken
-   // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
-   //benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::CSR            >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR                 >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::Ellpack        >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_Ellpack             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SlicedEllpackAlias               >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack       >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::BiEllpack      >( benchmark, hostOutVector, inputFileName, verboseMR );
+   /* AdEllpack is broken
+   benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
+    */
+} // namespace SpMVLegacy
 } // namespace Benchmarks
 } // namespace TNL
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 4c6aea68ed065d331fb0f34663c128f132ecb6e3..d8e2003fb5f9e3932d0964696ebf828b429f8f01 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -20,7 +20,7 @@
 #include <Benchmarks/BLAS/array-operations.h>
 #include <Benchmarks/BLAS/vector-operations.h>
-#include "spmv.h"
+#include "spmv-legacy.h"
 #include <TNL/Matrices/MatrixReader.h>
 using namespace TNL::Matrices;
@@ -37,14 +37,14 @@ runSpMVBenchmarks( Benchmark & benchmark,
                    const String & inputFileName,
                    bool verboseMR = false )
-    const String precision = getType< Real >();
-    metadata["precision"] = precision;
-    // Sparse matrix-vector multiplication
-    benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
-                            metadata );
-    // Start the actual benchmark in spmv.h
-    benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, verboseMR );
+   const String precision = getType< Real >();
+   metadata["precision"] = precision;
+   // Sparse matrix-vector multiplication
+   benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
+                           metadata );
+   // Start the actual benchmark in spmv.h
+   SpMVLegacy::benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, verboseMR );
 // Get current date time to have different log files names and avoid overwriting.
diff --git a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py
new file mode 100755
index 0000000000000000000000000000000000000000..5b44acc1f1faf0a55dca9f7adee14b3f2ac9ed99
--- /dev/null
+++ b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py
@@ -0,0 +1,217 @@
+#! /usr/bin/env python3
+import os
+import re
+import math
+import pandas
+from TNL.LogParser import LogParser
+#pandas.options.display.float_format = "{:.2f}".format
+pandas.options.display.float_format = "{:.2e}".format
+pandas.options.display.width = 0    # auto-detect terminal width for formatting
+pandas.options.display.max_rows = None
+def parse_file(fname):
+    parser = LogParser()
+    for metadata, df in parser.readFile(fname):
+        yield df
+def calculate_efficiency(df, nodes_col_index, base_column=None):
+    if base_column is None:
+        base_column = df[df.columns[0]]
+    eff_rows = []
+    for i in df.index:
+        row = df.loc[i]
+        eff_row = row.copy()
+        eff_idx = ("eff", *row.name[1:])
+        base = base_column[i]
+        for j in row.index:
+            if isinstance(j, int):
+                n = j
+            else:
+                n = j[nodes_col_index]
+            eff_row[j] = base / row[j] / n
+        eff_rows.append(eff_row)
+    eff_df = pandas.DataFrame(eff_rows)
+    eff_df.index = pandas.MultiIndex.from_tuples(eff_df.index)
+    eff_df = eff_df.rename(index={"time": "eff"})
+    return df.append(eff_df)
+log_files = ["sparse-matrix-benchmark.log"]
+print( "Parsing log file..." )
+dfs = []
+for f in log_files:
+    for df in parse_file(f):
+        dfs.append(df)
+df = pandas.concat(dfs)
+## Post-processing
+print( "Postprocessing data frame..." )
+# Drop norms of results differences
+#df.drop(columns=['CSR Diff.Max','CSR Diff.L2'], axis=1, level=1, inplace=True )
+# show matrix formats as columns
+df = df.unstack()
+df = df.reorder_levels([2, 0, 1], axis=1)
+df.sort_index(axis=1, inplace=True)
+# Drop CPU speedup
+df.drop(columns=('BiEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('CSR', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('CSR Legacy', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('ChunkedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('Ellpack', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('Ellpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('SlicedEllpack', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('SlicedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True )
+df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True )
+print( "Computing speed-up of formats...")
+# Add speedup compared to CSR and cuSparse
+df["BiEllpack Legacy",      "CPU", "CSR speedup"]      = df["BiEllpack Legacy",      "CPU", "time"] / df["CSR",      "CPU", "time"]
+df["BiEllpack Legacy",      "GPU", "cuSparse speedup"] = df["BiEllpack Legacy",      "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["CSR",                   "GPU", "cuSparse speedup"] = df["CSR",                   "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["CSR Legacy",            "GPU", "cuSparse speedup"] = df["CSR Legacy",            "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["ChunkedEllpack Legacy", "CPU", "CSR speedup"]      = df["ChunkedEllpack Legacy", "CPU", "time"] / df["CSR",      "CPU", "time"]
+df["ChunkedEllpack Legacy", "GPU", "cuSparse speedup"] = df["ChunkedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["Ellpack Legacy",        "CPU", "CSR speedup"]      = df["Ellpack Legacy",        "CPU", "time"] / df["CSR",      "CPU", "time"]
+df["Ellpack Legacy",        "GPU", "cuSparse speedup"] = df["Ellpack Legacy",        "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["Ellpack",               "CPU", "CSR speedup"]      = df["Ellpack",               "CPU", "time"] / df["CSR",      "CPU", "time"]
+df["Ellpack",               "GPU", "cuSparse speedup"] = df["Ellpack",               "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["SlicedEllpack Legacy",  "CPU", "CSR speedup"]      = df["SlicedEllpack Legacy",  "CPU", "time"] / df["CSR",      "CPU", "time"]
+df["SlicedEllpack Legacy",  "GPU", "cuSparse speedup"] = df["SlicedEllpack Legacy",  "GPU", "time"] / df["cuSparse", "GPU", "time"]
+df["SlicedEllpack",         "CPU", "CSR speedup"]      = df["SlicedEllpack",         "CPU", "time"] / df["CSR",      "CPU", "time"]
+df["SlicedEllpack",         "GPU", "cuSparse speedup"] = df["SlicedEllpack",         "GPU", "time"] / df["cuSparse", "GPU", "time"]
+# Add speedup compared to legacy formats
+df["CSR",                   "GPU", "Legacy speedup"]   = df["CSR",                   "GPU", "time"] / df["CSR Legacy",           "GPU", "time"]
+df["CSR",                   "CPU", "Legacy speedup"]   = df["CSR",                   "CPU", "time"] / df["CSR Legacy",           "CPU", "time"]
+df["Ellpack",               "GPU", "Legacy speedup"]   = df["Ellpack",               "GPU", "time"] / df["Ellpack Legacy",       "GPU", "time"]
+df["Ellpack",               "CPU", "Legacy speedup"]   = df["Ellpack",               "CPU", "time"] / df["Ellpack Legacy",       "CPU", "time"]
+df["SlicedEllpack",         "GPU", "Legacy speedup"]   = df["SlicedEllpack",         "GPU", "time"] / df["SlicedEllpack Legacy", "GPU", "time"]
+df["SlicedEllpack",         "CPU", "Legacy speedup"]   = df["SlicedEllpack",         "CPU", "time"] / df["SlicedEllpack Legacy", "CPU", "time"]
+print( "Exporting data frame to log.html..." )
+pandas.options.display.float_format = '{:,.4f}'.format
+# extract columns of reference formats on GPU
+print( "Preparing data for graph analysis..." )
+# sort by cuSparse
+cuSparse_list = df['cuSparse-bandwidth'].tolist()
+cuSparse_ellpack_gpu_list = df["Ellpack", "GPU", "bandwidth"].tolist();
+cuSparse_ellpack_legacy_gpu_list = df["Ellpack Legacy", "GPU", "bandwidth"].tolist();
+cuSparse_sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist();
+cuSparse_sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist();
+cuSparse_chunked_ellpack_legacy_gpu_list = df["ChunkedEllpack Legacy", "GPU", "bandwidth"].tolist();
+cuSparse_bi_ellpack_legacy_gpu_list = df["BiEllpack Legacy", "GPU", "bandwidth"].tolist();
+# sort by Ellpack
+ellpack_gpu_list = df["Ellpack", "GPU", "bandwidth"].tolist();
+ellpack_legacy_gpu_list = df["Ellpack Legacy", "GPU", "bandwidth"].tolist();
+# sort by SlicedEllpack
+sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist();
+sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist();
+print( "Writing gnuplot files..." )
+cuSparse_file = open( "cusparse.gplt", "w" )
+i = 0
+for x in cuSparse_list:
+   if str( x ) != "nan":
+      if str( cuSparse_ellpack_gpu_list[ i ] ) != "nan" and str( cuSparse_ellpack_legacy_gpu_list[ i ] ) != "nan":
+         cuSparse_file.write( f"{i+1} {x} " )
+         cuSparse_file.write( f"{cuSparse_ellpack_gpu_list[ i ]} {cuSparse_ellpack_legacy_gpu_list[ i ]} " )
+         cuSparse_file.write( f"{cuSparse_sliced_ellpack_gpu_list[ i ]} {cuSparse_sliced_ellpack_legacy_gpu_list[ i ]} " )
+         cuSparse_file.write( f"{cuSparse_chunked_ellpack_legacy_gpu_list[ i ]} {cuSparse_bi_ellpack_legacy_gpu_list[ i ]}\n" )
+   i = i + 1
+ellpack_file = open( "ellpack.gplt", "w" )
+i = 0;
+for x in ellpack_gpu_list:
+   if str( x ) != "nan":
+      if str( ellpack_legacy_gpu_list[ i ] ) != "nan":
+         ellpack_file.write( f"{i+1} {x} {ellpack_legacy_gpu_list[ i ]}\n" )
+   i = i + 1
+sliced_ellpack_file = open( "sliced-ellpack.gplt", "w" )
+i = 0;
+for x in sliced_ellpack_gpu_list:
+   if str( x ) != "nan":
+      if str( sliced_ellpack_legacy_gpu_list[ i ] ) != "nan":
+         sliced_ellpack_file.write( f"{i+1} {x} {sliced_ellpack_legacy_gpu_list[ i ]}\n" )
+   i = i + 1
+print( "Generating Gnuplot file..." )
+gnuplot_file = open( "gnuplot.gplt", "w" )
+# NOTE: """...""" allows multi-line strings, r"..." disables backslash-escaping (so a single \ is just a \ in the output)
+gnuplot_file.write( r"""
+set terminal postscript lw 3 20 color
+set grid
+set xlabel 'Matrix'
+set xtics 250
+set ylabel 'Bandwidth GB/sec'
+set output 'ellpack-vs-cusparse.eps'
+plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \
+     'cusparse.gplt' using 1:3 title 'Ellpack' with dots linewidth 2 lt rgb 'green', \
+     'cusparse.gplt' using 1:4 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'
+# TODO: formatting like ^
+gnuplot_file.write( "set output 'sliced-ellpack-vs-cusparse.eps'\n" )
+gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
+gnuplot_file.write( "     'cusparse.gplt' using 1:5 title 'Sliced Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" )
+gnuplot_file.write( "     'cusparse.gplt' using 1:6 title 'Sliced Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
+gnuplot_file.write( "set output 'chunked-ellpack-vs-cusparse.eps'\n" )
+gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
+#gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" )
+gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'Chunked Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
+gnuplot_file.write( "set output 'bi-ellpack-vs-cusparse.eps'\n" )
+gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" )
+#gnuplot_file.write( "     'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" )
+gnuplot_file.write( "     'cusparse.gplt' using 1:8 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
+gnuplot_file.write( "set output 'ellpack-vs-ellpack-legacy.eps'\n" )
+gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" )
+gnuplot_file.write( "     'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
+gnuplot_file.write( "set output 'sliced-ellpack-vs-sliced-ellpack-legacy.eps'\n" )
+gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" )
+gnuplot_file.write( "     'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" )
+print( "Executing Gnuplot ..." )
+os.system( "gnuplot gnuplot.gplt" )
+print( "Converting files to PDF ..." )
+os.system( "epstopdf --autorotate All ellpack-vs-cusparse.eps" )
+os.system( "epstopdf --autorotate All sliced-ellpack-vs-cusparse.eps" )
+os.system( "epstopdf --autorotate All chunked-ellpack-vs-cusparse.eps" )
+os.system( "epstopdf --autorotate All bi-ellpack-vs-cusparse.eps" )
+os.system( "epstopdf --autorotate All ellpack-vs-ellpack-legacy.eps" )
+os.system( "epstopdf --autorotate All sliced-ellpack-vs-sliced-ellpack-legacy.eps" )
+print( "Deleting temprary files..." )
+os.system( "rm cusparse.gplt" )
+os.system( "rm ellpack.gplt" )
+os.system( "rm sliced-ellpack.gplt" )
+os.system( "rm gnuplot.gplt" )
+os.system( "rm ellpack-vs-cusparse.eps" )
+os.system( "rm sliced-ellpack-vs-cusparse.eps" )
+os.system( "rm chunked-ellpack-vs-cusparse.eps" )
+os.system( "rm bi-ellpack-vs-cusparse.eps" )
+os.system( "rm ellpack-vs-ellpack-legacy.eps" )
+os.system( "rm sliced-ellpack-vs-sliced-ellpack-legacy.eps" )
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index 1166245110501aa6c8c66c5a81448ab82895b54d..a73385eb1aab92d095406976c23d1b4bdb711728 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -193,6 +193,22 @@ class Array
       Array( Array&& array );
+      /**
+       * \brief Copy constructor from array with different template parameters.
+       * 
+       * \tparam Value_ Value type of the input array.
+       * \tparam Device_ Device type of the input array.
+       * \tparam Index_ Index type of the input array.
+       * \tparam Allocator_ Allocator type of the input array.
+       * \param a the input array.
+       */
+      template< typename Value_,
+                typename Device_,
+                typename Index_,
+                typename Allocator_ >
+      explicit Array( const Array< Value_, Device_, Index_, Allocator_ >& a );
        * \brief Constructor which initializes the array by copying elements from
        * \ref std::initializer_list, e.g. `{...}`.
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index ab81db7aabc214ab66751905446849179aa1975a..f45b7370de7fb3152df2facd377dbced37ef0466 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -132,6 +132,20 @@ Array( const Array< Value, Device, Index, Allocator >& array,
    Algorithms::MemoryOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size );
+template< typename Value,
+          typename Device,
+          typename Index,
+          typename Allocator >
+      template< typename Value_,
+                typename Device_,
+                typename Index_,
+                typename Allocator_ >
+Array< Value, Device, Index, Allocator >::
+Array( const Array< Value_, Device_, Index_, Allocator_ >& a )
+   *this = a;
 template< typename Value,
           typename Device,
           typename Index,
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index 02be7f099f1fd9446200d8e10340c5a6bdc6afed..b94db8c88b5df8b14c719e099a7cc525682e6d45 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -28,6 +28,15 @@ CSRView()
+template< typename Device,
+          typename Index >
+CSRView< Device, Index >::
+CSRView( const OffsetsView& offsets_view )
+   : offsets( offsets_view )
 template< typename Device,
           typename Index >
diff --git a/src/TNL/Containers/Segments/ChunkedEllpack.h b/src/TNL/Containers/Segments/ChunkedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..93580a9cdc7a877a259f84959754b7dd302cab29
--- /dev/null
+++ b/src/TNL/Containers/Segments/ChunkedEllpack.h
@@ -0,0 +1,163 @@
+                          ChunkedEllpack.h -  description
+                             -------------------
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Allocators/Default.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/ChunkedEllpackView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class ChunkedEllpack
+   public:
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >;
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder >;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder >;
+      using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
+      using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
+      //TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >;
+      using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
+      using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
+      ChunkedEllpack() = default;
+      ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+      ChunkedEllpack( const ChunkedEllpack& segments );
+      ChunkedEllpack( const ChunkedEllpack&& segments );
+      static String getSerializationType();
+      static String getSegmentsType();
+      ViewType getView();
+      ConstViewType getConstView() const;
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+      __cuda_callable__
+      IndexType getStorageSize() const;
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+      ChunkedEllpack& operator=( const ChunkedEllpack& source ) = default;
+      template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+      ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source );
+      void save( File& file ) const;
+      void load( File& file );
+      void printStructure( std::ostream& str ); // TODO const;
+   protected:
+      template< typename SegmentsSizes >
+      void resolveSliceSizes( SegmentsSizes& rowLengths );
+      template< typename SegmentsSizes >
+      bool setSlice( SegmentsSizes& rowLengths,
+                     const IndexType sliceIdx,
+                     IndexType& elementsToAllocation );
+      IndexType size = 0, storageSize = 0;
+      IndexType chunksInSlice = 256, desiredChunkSize = 16;
+      /**
+       * For each segment, this keeps index of the slice which contains the
+       * segment.
+       */
+      OffsetsHolder rowToSliceMapping;
+      /**
+       * For each row, this keeps index of the first chunk within a slice.
+       */
+      OffsetsHolder rowToChunkMapping;
+      OffsetsHolder chunksToSegmentsMapping;
+      /**
+       * Keeps index of the first segment index.
+       */
+      OffsetsHolder rowPointers;
+      ChunkedEllpackSliceInfoContainer slices;
+      IndexType numberOfSlices;
+      template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+      friend class ChunkedEllpack;
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+#include <TNL/Containers/Segments/ChunkedEllpack.hpp>
diff --git a/src/TNL/Containers/Segments/ChunkedEllpack.hpp b/src/TNL/Containers/Segments/ChunkedEllpack.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d0cf6fe7ea1d517feac4edcd4071a22c28f84ae
--- /dev/null
+++ b/src/TNL/Containers/Segments/ChunkedEllpack.hpp
@@ -0,0 +1,508 @@
+                          ChunkedEllpack.hpp -  description
+                             -------------------
+    begin                : Jan 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/ChunkedEllpack.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes )
+   : size( 0 ), storageSize( 0 ), chunksInSlice( 0 ), desiredChunkSize( 0 )
+   this->setSegmentsSizes( sizes );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+ChunkedEllpack( const ChunkedEllpack& chunkedEllpack )
+   : size( chunkedEllpack.size ),
+     storageSize( chunkedEllpack.storageSize ),
+     chunksInSlice( chunkedEllpack.chunksInSlice ), 
+     desiredChunkSize( chunkedEllpack.desiredChunkSize ),
+     rowToChunkMapping( chunkedEllpack.rowToChunkMapping ),
+     rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ),
+     chunksToSegmentsMapping( chunkedEllpack. chunksToSegmentsMapping ),
+     rowPointers( chunkedEllpack.rowPointers ),
+     slices( chunkedEllpack.slices ),
+     numberOfSlices( chunkedEllpack.numberOfSlices )
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+ChunkedEllpack( const ChunkedEllpack&& chunkedEllpack )
+   : size( chunkedEllpack.size ),
+     storageSize( chunkedEllpack.storageSize ),
+     chunksInSlice( chunkedEllpack.chunksInSlice ),
+     desiredChunkSize( chunkedEllpack.desiredChunkSize ),
+     rowToChunkMapping( chunkedEllpack.rowToChunkMapping ),
+     rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ),
+     chunksToSegmentsMapping( chunkedEllpack. chunksToSegmentsMapping ),
+     rowPointers( chunkedEllpack.rowPointers ),
+     slices( chunkedEllpack.slices ),
+     numberOfSlices( chunkedEllpack.numberOfSlices )
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+   return "ChunkedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+   return ViewType::getSegmentsType();
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ViewType
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+   return ViewType( size, storageSize, chunksInSlice, desiredChunkSize,
+                    rowToChunkMapping.getView(),
+                    rowToSliceMapping.getView(),
+                    chunksToSegmentsMapping.getView(),
+                    rowPointers.getView(),
+                    slices.getView(),
+                    numberOfSlices );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ConstViewType
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getConstView() const
+   return ConstViewType( size, storageSize, chunksInSlice, desiredChunkSize,
+                         rowToChunkMapping.getConstView(),
+                         rowToSliceMapping.getConstView(),
+                         chunksToSegmentsMapping.getConstView(),
+                         rowPointers.getConstView(),
+                         slices.getConstView(),
+                         numberOfSlices );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename SegmentsSizes >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+resolveSliceSizes( SegmentsSizes& segmentsSizes )
+   /****
+    * Iterate over rows and allocate slices so that each slice has
+    * approximately the same number of allocated elements
+    */
+   const IndexType desiredElementsInSlice =
+            this->chunksInSlice * this->desiredChunkSize;
+   IndexType segmentIdx( 0 ),
+             sliceSize( 0 ),
+             allocatedElementsInSlice( 0 );
+   numberOfSlices = 0;
+   while( segmentIdx < segmentsSizes.getSize() )
+   {
+      /****
+       * Add one row to the current slice until we reach the desired
+       * number of elements in a slice.
+       */
+      allocatedElementsInSlice += segmentsSizes[ segmentIdx ];
+      sliceSize++;
+      segmentIdx++;
+      if( allocatedElementsInSlice < desiredElementsInSlice  )
+          if( segmentIdx < segmentsSizes.getSize() && sliceSize < chunksInSlice ) continue;
+      TNL_ASSERT( sliceSize >0, );
+      this->slices[ numberOfSlices ].size = sliceSize;
+      this->slices[ numberOfSlices ].firstSegment = segmentIdx - sliceSize;
+      this->slices[ numberOfSlices ].pointer = allocatedElementsInSlice; // this is only temporary
+      sliceSize = 0;
+      numberOfSlices++;
+      allocatedElementsInSlice = 0;
+   }
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename SegmentsSizes >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+setSlice( SegmentsSizes& rowLengths,
+          const IndexType sliceIndex,
+          IndexType& elementsToAllocation )
+   /****
+    * Now, compute the number of chunks per each row.
+    * Each row get one chunk by default.
+    * Then each row will get additional chunks w.r. to the
+    * number of the elements in the row. If there are some
+    * free chunks left, repeat it again.
+    */
+   const IndexType sliceSize = this->slices[ sliceIndex ].size;
+   const IndexType sliceBegin = this->slices[ sliceIndex ].firstSegment;
+   const IndexType allocatedElementsInSlice = this->slices[ sliceIndex ].pointer;
+   const IndexType sliceEnd = sliceBegin + sliceSize;
+   IndexType freeChunks = this->chunksInSlice - sliceSize;
+   for( IndexType i = sliceBegin; i < sliceEnd; i++ )
+      this->rowToChunkMapping.setElement( i, 1 );
+   int totalAddedChunks( 0 );
+   int maxRowLength( rowLengths[ sliceBegin ] );
+   for( IndexType i = sliceBegin; i < sliceEnd; i++ )
+   {
+      double rowRatio( 0.0 );
+      if( allocatedElementsInSlice != 0 )
+         rowRatio = ( double ) rowLengths[ i ] / ( double ) allocatedElementsInSlice;
+      const IndexType addedChunks = freeChunks * rowRatio;
+      totalAddedChunks += addedChunks;
+      this->rowToChunkMapping[ i ] += addedChunks;
+      if( maxRowLength < rowLengths[ i ] )
+         maxRowLength = rowLengths[ i ];
+   }
+   freeChunks -= totalAddedChunks;
+   while( freeChunks )
+      for( IndexType i = sliceBegin; i < sliceEnd && freeChunks; i++ )
+         if( rowLengths[ i ] == maxRowLength )
+         {
+            this->rowToChunkMapping[ i ]++;
+            freeChunks--;
+         }
+   /****
+    * Compute the chunk size
+    */
+   IndexType maxChunkInSlice( 0 );
+   for( IndexType i = sliceBegin; i < sliceEnd; i++ )
+   {
+      TNL_ASSERT_NE( this->rowToChunkMapping[ i ], 0, "" );
+      maxChunkInSlice = TNL::max( maxChunkInSlice,
+                              roundUpDivision( rowLengths[ i ], this->rowToChunkMapping[ i ] ) );
+   }
+   TNL_ASSERT_GT( maxChunkInSlice, 0, "" );
+   /****
+    * Set-up the slice info.
+    */
+   this->slices[ sliceIndex ].chunkSize = maxChunkInSlice;
+   this->slices[ sliceIndex ].pointer = elementsToAllocation;
+   elementsToAllocation += this->chunksInSlice * maxChunkInSlice;
+   for( IndexType i = sliceBegin; i < sliceEnd; i++ )
+      this->rowToSliceMapping[ i ] = sliceIndex;
+   for( IndexType i = sliceBegin; i < sliceEnd; i++ )
+   {
+      this->rowPointers[ i + 1 ] = maxChunkInSlice*rowToChunkMapping[ i ];
+      TNL_ASSERT( this->rowPointers[ i ] >= 0,
+                 std::cerr << "this->rowPointers[ i ] = " << this->rowPointers[ i ] );
+      TNL_ASSERT( this->rowPointers[ i + 1 ] >= 0,
+                 std::cerr << "this->rowPointers[ i + 1 ] = " << this->rowPointers[ i + 1 ] );
+   }
+   /****
+    * Finish the row to chunk mapping by computing the prefix sum.
+    */
+   for( IndexType j = sliceBegin + 1; j < sliceEnd; j++ )
+      rowToChunkMapping[ j ] += rowToChunkMapping[ j - 1 ];
+   return true;
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename SizesHolder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+setSegmentsSizes( const SizesHolder& segmentsSizes )
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      this->size = segmentsSizes.getSize();
+      this->slices.setSize( this->size );
+      this->rowToChunkMapping.setSize( this->size );
+      this->rowToSliceMapping.setSize( this->size );
+      this->rowPointers.setSize( this->size + 1 );
+      this->resolveSliceSizes( segmentsSizes );
+      this->rowPointers.setElement( 0, 0 );
+      this->storageSize = 0;
+      for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ )
+         this->setSlice( segmentsSizes, sliceIndex, storageSize );
+      this->rowPointers.scan();
+      IndexType chunksCount = this->numberOfSlices * this->chunksInSlice;
+      this->chunksToSegmentsMapping.setSize( chunksCount );
+      IndexType chunkIdx( 0 );
+      for( IndexType segmentIdx = 0; segmentIdx < this->size; segmentIdx++ )
+      {
+         const IndexType& sliceIdx = rowToSliceMapping[ segmentIdx ];
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices[ sliceIdx ].firstSegment )
+               firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];
+         const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         for( IndexType i = 0; i < segmentChunksCount; i++ )
+            this->chunksToSegmentsMapping[ chunkIdx++ ] = segmentIdx;
+      }
+   }
+   else
+   {
+      ChunkedEllpack< Devices::Host, Index, typename Allocators::Default< Devices::Host >::template Allocator< Index >, RowMajorOrder > hostSegments;
+      Containers::Vector< IndexType, Devices::Host, IndexType > hostSegmentsSizes( segmentsSizes );
+      hostSegments.setSegmentsSizes( hostSegmentsSizes );
+      *this = hostSegments;
+   }
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getSegmentsCount() const
+   return this->segmentsCount;
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getSegmentSize( const IndexType segmentIdx ) const
+   return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize(
+      rowToSliceMapping.getView(),
+      slices.getView(),
+      rowToChunkMapping.getView(),
+      segmentIdx );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getSize() const
+   return this->size;
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getStorageSize() const
+   return this->storageSize;
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx,
+         localIdx );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename Function, typename... Args >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+   this->getConstView().forSegments( first, last, f, args... );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename Function, typename... Args >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+forAll( Function& f, Args... args ) const
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+   this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+   template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >&
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source )
+   this->size = source.size;
+   this->storageSize = source.storageSize;
+   this->chunksInSlice = source.chunksInSlice;
+   this->desiredChunkSize = source.desiredChunkSize;
+   this->rowToChunkMapping = source.rowToChunkMapping;
+   this->rowToSliceMapping = source.rowToSliceMapping;
+   this->rowPointers = source.rowPointers;
+   this->chunksToSegmentsMapping = source.chunksToSegmentsMapping;
+   this->slices = source.slices;
+   this->numberOfSlices = source.numberOfSlices;
+   return *this;
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+save( File& file ) const
+   file.save( &this->size );
+   file.save( &this->storageSize );
+   file.save( &this->chunksInSlice );
+   file.save( &this->desiredChunkSize );
+   file << this->rowToChunkMapping
+        << this->rowToSliceMapping
+        << this->rowPointers
+        << this->chunksToSegmentsMapping
+        << this->slices;
+   file.save( this->numberOfSlices );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+load( File& file )
+   file.load( &this->size );
+   file.load( &this->storageSize );
+   file.load( &this->chunksInSlice );
+   file.load( &this->desiredChunkSize );
+   file >> this->rowToChunkMapping
+        >> this->rowToSliceMapping
+        >> this->chunksToSegmentsMapping
+        >> this->rowPointers
+        >> this->slices;
+   file.load( &this->numberOfSlices );
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder >
+ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
+printStructure( std::ostream& str )
+   this->getView().printStructure( str );
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h b/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h
new file mode 100644
index 0000000000000000000000000000000000000000..9eba9dd6867fe023ba418d70ff2c616e5f1e1e3b
--- /dev/null
+++ b/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h
@@ -0,0 +1,94 @@
+                          ChunkedEllpackChunkedEllpackSegmentView.h -  description
+                             -------------------
+    begin                : Mar 24, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+template< typename Index,
+          bool RowMajorOrder = false >
+class ChunkedEllpackSegmentView;
+template< typename Index >
+class ChunkedEllpackSegmentView< Index, false >
+   public:
+      using IndexType = Index;
+      __cuda_callable__
+      ChunkedEllpackSegmentView( const IndexType offset,
+                                 const IndexType size,
+                                 const IndexType chunkSize,      // this is only for compatibility with the following specialization
+                                 const IndexType chunksInSlice ) // this one as well - both can be replaced when we could use constexprif in C++17
+      : segmentOffset( offset ), segmentSize( size ){};
+      __cuda_callable__
+      ChunkedEllpackSegmentView( const ChunkedEllpackSegmentView& view )
+      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ){};
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIndex ) const
+      {
+         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
+         return segmentOffset + localIndex;
+      };
+      protected:
+         IndexType segmentOffset, segmentSize;
+template< typename Index >
+class ChunkedEllpackSegmentView< Index, true >
+   public:
+      using IndexType = Index;
+      __cuda_callable__
+      ChunkedEllpackSegmentView( const IndexType offset,
+                                 const IndexType size,
+                                 const IndexType chunkSize,
+                                 const IndexType chunksInSlice )
+      : segmentOffset( offset ), segmentSize( size ),
+        chunkSize( chunkSize ), chunksInSlice( chunksInSlice ){};
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIdx ) const
+      {
+         TNL_ASSERT_LT( localIdx, segmentSize, "Local index exceeds segment bounds." );
+         const IndexType chunkIdx = localIdx / chunkSize;
+         const IndexType inChunkOffset = localIdx % chunkSize;
+         return segmentOffset + inChunkOffset * chunksInSlice + chunkIdx;
+      };
+      protected:
+         IndexType segmentOffset, segmentSize, chunkSize, chunksInSlice;
+      } //namespace Segments
+   } //namespace Containers
+} //namespace TNL
diff --git a/src/TNL/Containers/Segments/ChunkedEllpackView.h b/src/TNL/Containers/Segments/ChunkedEllpackView.h
new file mode 100644
index 0000000000000000000000000000000000000000..4b444d08473a2763c2f50a7af5bf8fc70b5fef0e
--- /dev/null
+++ b/src/TNL/Containers/Segments/ChunkedEllpackView.h
@@ -0,0 +1,215 @@
+                          ChunkedEllpackView.h -  description
+                             -------------------
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <type_traits>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/ChunkedEllpackSegmentView.h>
+#include <TNL/Containers/Segments/details/ChunkedEllpack.h>
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class ChunkedEllpackView
+   public:
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const< IndexType >::type >;
+      using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type >::ConstViewType;
+      using ViewType = ChunkedEllpackView;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = ChunkedEllpackView< Device_, Index_ >;
+      using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index > >;
+      using SegmentViewType = ChunkedEllpackSegmentView< IndexType >;
+      using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
+      using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
+      using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
+      using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ViewType;
+      __cuda_callable__
+      ChunkedEllpackView() = default;
+      __cuda_callable__
+      ChunkedEllpackView( const IndexType size,
+                          const IndexType storageSize,
+                          const IndexType chunksInSlice,
+                          const IndexType desiredChunkSize,
+                          const OffsetsView& rowToChunkMapping,
+                          const OffsetsView& rowToSliceMapping,
+                          const OffsetsView& chunksToSegmentsMapping,
+                          const OffsetsView& rowPointers,
+                          const ChunkedEllpackSliceInfoContainerView& slices,
+                          const IndexType numberOfSlices );
+      __cuda_callable__
+      ChunkedEllpackView( const IndexType size,
+                          const IndexType storageSize,
+                          const IndexType chunksInSlice,
+                          const IndexType desiredChunkSize,
+                          const OffsetsView&& rowToChunkMapping,
+                          const OffsetsView&& rowToSliceMapping,
+                          const OffsetsView&& chunksToSegmentsMapping,
+                          const OffsetsView&& rowPointers,
+                          const ChunkedEllpackSliceInfoContainerView&& slices,
+                          const IndexType numberOfSlices );
+      __cuda_callable__
+      ChunkedEllpackView( const ChunkedEllpackView& chunked_ellpack_view );
+      __cuda_callable__
+      ChunkedEllpackView( const ChunkedEllpackView&& chunked_ellpack_view );
+      static String getSerializationType();
+      static String getSegmentsType();
+      __cuda_callable__
+      ViewType getView();
+      __cuda_callable__
+      ConstViewType getConstView() const;
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+      /***
+       * \brief Returns number of elements managed by all segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
+      __cuda_callable__
+      IndexType getStorageSize() const;
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+      ChunkedEllpackView& operator=( const ChunkedEllpackView& view );
+      void save( File& file ) const;
+      void load( File& file );
+      void printStructure( std::ostream& str ) const;
+   protected:
+#ifdef HAVE_CUDA
+      template< typename Fetch,
+                typename Reduction,
+                typename ResultKeeper,
+                typename Real,
+                typename... Args >
+      __device__
+      void segmentsReductionKernel( IndexType gridIdx,
+                                    IndexType first,
+                                    IndexType last,
+                                    Fetch fetch,
+                                    Reduction reduction,
+                                    ResultKeeper keeper,
+                                    Real zero,
+                                    Args... args ) const;
+      IndexType size = 0, storageSize = 0, numberOfSlices = 0;
+      IndexType chunksInSlice = 256, desiredChunkSize = 16;
+      /**
+       * For each segment, this keeps index of the slice which contains the
+       * segment.
+       */
+      OffsetsView rowToSliceMapping;
+      /**
+       * For each row, this keeps index of the first chunk within a slice.
+       */
+      OffsetsView rowToChunkMapping;
+      OffsetsView chunksToSegmentsMapping;
+      /**
+       * Keeps index of the first segment index.
+       */
+      OffsetsView rowPointers;
+      ChunkedEllpackSliceInfoContainerView slices;
+#ifdef HAVE_CUDA
+      template< typename View_,
+                typename Index_,
+                typename Fetch_,
+                typename Reduction_,
+                typename ResultKeeper_,
+                typename Real_,
+                typename... Args_ >
+      friend __global__
+      void ChunkedEllpackSegmentsReductionKernel( View_ chunkedEllpack,
+                                                  Index_ gridIdx,
+                                                  Index_ first,
+                                                  Index_ last,
+                                                  Fetch_ fetch,
+                                                  Reduction_ reduction,
+                                                  ResultKeeper_ keeper,
+                                                  Real_ zero,
+                                                  Args_... args );
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+#include <TNL/Containers/Segments/ChunkedEllpackView.hpp>
diff --git a/src/TNL/Containers/Segments/ChunkedEllpackView.hpp b/src/TNL/Containers/Segments/ChunkedEllpackView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b3b151624f500282451ff0ea4a643c9011790b6c
--- /dev/null
+++ b/src/TNL/Containers/Segments/ChunkedEllpackView.hpp
@@ -0,0 +1,654 @@
+                          ChunkedEllpackView.hpp -  description
+                             -------------------
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/ChunkedEllpackView.h>
+//#include <TNL/Containers/Segments/details/ChunkedEllpack.h>
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+#ifdef HAVE_CUDA
+template< typename View,
+          typename Index,
+          typename Fetch,
+          typename Reduction,
+          typename ResultKeeper,
+          typename Real,
+          typename... Args >
+void ChunkedEllpackSegmentsReductionKernel( View chunkedEllpack,
+                                            Index gridIdx,
+                                            Index first,
+                                            Index last,
+                                            Fetch fetch,
+                                            Reduction reduction,
+                                            ResultKeeper keeper,
+                                            Real zero,
+                                            Args... args )
+   chunkedEllpack.segmentsReductionKernel( gridIdx, first, last, fetch, reduction, keeper, zero, args... );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+ChunkedEllpackView( const IndexType size,
+                    const IndexType storageSize,
+                    const IndexType chunksInSlice,
+                    const IndexType desiredChunkSize,
+                    const OffsetsView& rowToChunkMapping,
+                    const OffsetsView& rowToSliceMapping,
+                    const OffsetsView& chunksToSegmentsMapping,
+                    const OffsetsView& rowPointers,
+                    const ChunkedEllpackSliceInfoContainerView& slices,
+                    const IndexType numberOfSlices )
+: size( size ),
+  storageSize( storageSize ),
+  numberOfSlices( numberOfSlices ),
+  chunksInSlice( chunksInSlice ),
+  desiredChunkSize( desiredChunkSize ),
+  rowToSliceMapping( rowToSliceMapping ),
+  rowToChunkMapping( rowToChunkMapping ),
+  chunksToSegmentsMapping( chunksToSegmentsMapping ),
+  rowPointers( rowPointers ),
+  slices( slices )
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+ChunkedEllpackView( const IndexType size,
+                    const IndexType storageSize,
+                    const IndexType chunksInSlice,
+                    const IndexType desiredChunkSize,
+                    const OffsetsView&& rowToChunkMapping,
+                    const OffsetsView&& rowToSliceMapping,
+                    const OffsetsView&& chunksToSegmentsMapping,
+                    const OffsetsView&& rowPointers,
+                    const ChunkedEllpackSliceInfoContainerView&& slices,
+                    const IndexType numberOfSlices )
+: size( size ),
+  storageSize( storageSize ),
+  numberOfSlices( numberOfSlices ),
+  chunksInSlice( chunksInSlice ),
+  desiredChunkSize( desiredChunkSize ),
+  rowToSliceMapping( std::move( rowToSliceMapping ) ),
+  rowToChunkMapping( std::move( rowToChunkMapping ) ),
+  chunksToSegmentsMapping( std::move( chunksToSegmentsMapping ) ),
+  rowPointers( std::move( rowPointers ) ),
+  slices( std::move( slices ) )
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+ChunkedEllpackView( const ChunkedEllpackView& chunked_ellpack_view )
+: size( chunked_ellpack_view.size ),
+  storageSize( chunked_ellpack_view.storageSize ),
+  numberOfSlices( chunked_ellpack_view.numberOfSlices ),
+  chunksInSlice( chunked_ellpack_view.chunksInSlice ),
+  desiredChunkSize( chunked_ellpack_view.desiredChunkSize ),
+  rowToSliceMapping( chunked_ellpack_view.rowToSliceMapping ),
+  rowToChunkMapping( chunked_ellpack_view.rowToChunkMapping ),
+  chunksToSegmentsMapping( chunked_ellpack_view.chunksToSegmentsMapping ),
+  rowPointers( chunked_ellpack_view.rowPointers ),
+  slices( chunked_ellpack_view.slices )
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+ChunkedEllpackView( const ChunkedEllpackView&& chunked_ellpack_view )
+: size( chunked_ellpack_view.size ),
+  storageSize( chunked_ellpack_view.storageSize ),
+  numberOfSlices( chunked_ellpack_view.numberOfSlices ),
+  chunksInSlice( chunked_ellpack_view.chunksInSlice ),
+  desiredChunkSize( chunked_ellpack_view.desiredChunkSize ),
+  rowToSliceMapping( std::move( chunked_ellpack_view.rowToSliceMapping ) ),
+  rowToChunkMapping( std::move( chunked_ellpack_view.rowToChunkMapping ) ),
+  chunksToSegmentsMapping( std::move( chunked_ellpack_view.chunksToSegmentsMapping ) ),
+  rowPointers( std::move( chunked_ellpack_view.rowPointers ) ),
+  slices( std::move( chunked_ellpack_view.slices ) )
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+   return "ChunkedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+   return "ChunkedEllpack";
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+typename ChunkedEllpackView< Device, Index, RowMajorOrder >::ViewType
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+   return ViewType( size, chunksInSlice, desiredChunkSize,
+                    rowToChunkMapping.getView(),
+                    rowToSliceMapping.getView(),
+                    chunksToSegmentsMapping.getView(),
+                    rowPointers.getView(),
+                    slices.getView(),
+                    numberOfSlices );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+typename ChunkedEllpackView< Device, Index, RowMajorOrder >::ConstViewType
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getConstView() const
+   return ConstViewType( size, chunksInSlice, desiredChunkSize,
+                         rowToChunkMapping.getConstView(),
+                         rowToSliceMapping.getConstView(),
+                         chunksToSegmentsMapping.getConstView(),
+                         rowPointers.getConstView(),
+                         slices.getConstView(),
+                         numberOfSlices );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getSegmentsCount() const
+   return this->size;
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getSegmentSize( const IndexType segmentIdx ) const
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         segmentIdx );
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         segmentIdx );
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         segmentIdx );
+   }
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getSize() const
+   return this->size;
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getStorageSize() const
+   return this->storageSize;
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx,
+         localIdx );
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx,
+         localIdx );
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx,
+         localIdx );
+   }
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx );
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx );
+      return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentView(
+         rowToSliceMapping,
+         slices,
+         rowToChunkMapping,
+         chunksInSlice,
+         segmentIdx );
+   }
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function, typename... Args >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+   const IndexType chunksInSlice = this->chunksInSlice;
+   auto rowToChunkMapping = this->rowToChunkMapping;
+   auto rowToSliceMapping = this->rowToSliceMapping;
+   auto slices = this->slices;
+   auto work = [=] __cuda_callable__ ( IndexType segmentIdx, Args... args ) mutable {
+      const IndexType sliceIdx = rowToSliceMapping[ segmentIdx ];
+      IndexType firstChunkOfSegment( 0 );
+      if( segmentIdx != slices[ sliceIdx ].firstSegment )
+      {
+         firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];
+      }
+      const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
+      const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+      const IndexType sliceOffset = slices[ sliceIdx ].pointer;
+      const IndexType chunkSize = slices[ sliceIdx ].chunkSize;
+      const IndexType segmentSize = segmentChunksCount * chunkSize;
+      bool compute( true );
+      if( RowMajorOrder )
+      {
+         IndexType begin = sliceOffset + firstChunkOfSegment * chunkSize;
+         IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         for( IndexType j = begin; j < end && compute; j++ )
+            f( segmentIdx, localIdx++, j, compute, args...);
+      }
+      else
+      {
+         IndexType localIdx( 0 );
+         for( IndexType chunkIdx = 0; chunkIdx < segmentChunksCount; chunkIdx++ )
+         {
+            IndexType begin = sliceOffset + firstChunkOfSegment + chunkIdx;
+            IndexType end = begin + chunksInSlice * chunkSize;
+            for( IndexType j = begin; j < end && compute; j += chunksInSlice )
+            {
+               f( segmentIdx, localIdx++, j, compute, args...);
+            }
+         }
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last , work, args... );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function, typename... Args >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+forAll( Function& f, Args... args ) const
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      //segmentsReductionKernel( 0, first, last, fetch, reduction, keeper, zero, args... );
+      //return;
+      for( IndexType segmentIdx = first; segmentIdx < last; segmentIdx++ )
+      {
+         const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
+         TNL_ASSERT_LE( sliceIndex, this->size, "" );
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices[ sliceIndex ].firstSegment )
+            firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];
+         const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType sliceOffset = slices[ sliceIndex ].pointer;
+         const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
+         const IndexType segmentSize = segmentChunksCount * chunkSize;
+         RealType aux( zero );
+         bool compute( true );
+         if( RowMajorOrder )
+         {
+            IndexType begin = sliceOffset + firstChunkOfSegment * chunkSize;
+            IndexType end = begin + segmentSize;
+            IndexType localIdx( 0 );
+            for( IndexType j = begin; j < end && compute; j++ )
+               reduction( aux, fetch( segmentIdx, localIdx++, j, compute, args...) );
+         }
+         else
+         {
+            for( IndexType chunkIdx = 0; chunkIdx < segmentChunksCount; chunkIdx++ )
+            {
+               IndexType begin = sliceOffset + firstChunkOfSegment + chunkIdx;
+               IndexType end = begin + chunksInSlice * chunkSize;
+               IndexType localIdx( 0 );
+               for( IndexType j = begin; j < end && compute; j += chunksInSlice )
+                  reduction( aux, fetch( segmentIdx, localIdx++, j, compute, args...) );
+            }
+         }
+         keeper( segmentIdx, aux );
+      }
+   }
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      //const IndexType chunksCount = this->numberOfSlices * this->chunksInSlice;
+      // TODO: This ignores parameters first and last
+      const IndexType cudaBlocks = this->numberOfSlices;
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      dim3 cudaBlockSize( this->chunksInSlice ), cudaGridSize;
+      const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         ChunkedEllpackSegmentsReductionKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, Args...  >
+            <<< cudaGridSize, cudaBlockSize, sharedMemory  >>>
+            ( *this, gridIdx, first, last, fetch, reduction, keeper, zero, args... );
+      }
+   }
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >&
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+operator=( const ChunkedEllpackView& view )
+   this->size = view.size;
+   this->storageSize = view.storageSize;
+   this->chunksInSlice = view.chunksInSlice;
+   this->desiredChunkSize = view.desiredChunkSize;
+   this->rowToChunkMapping.bind( view.rowToChunkMapping );
+   this->chunksToSegmentsMapping.bind( view.chunksToSegmentsMapping );
+   this->rowToSliceMapping.bind( view.rowToSliceMapping );
+   this->rowPointers.bind( view.rowPointers );
+   this->slices.bind( view.slices );
+   this->numberOfSlices = view.numberOfSlices;
+   return *this;
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+save( File& file ) const
+   file.save( &this->size );
+   file.save( &this->storageSize );
+   file.save( &this->chunksInSlice );
+   file.save( &this->desiredChunkSize );
+   file << this->rowToChunkMapping
+        << this->chunksToSegmentsMapping
+        << this->rowToSliceMapping
+        << this->rowPointers
+        << this->slices;
+   file.save( &this->numberOfSlices );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+load( File& file )
+   file.load( &this->size );
+   file.load( &this->storageSize );
+   file.load( &this->chunksInSlice );
+   file.load( &this->desiredChunkSize );
+   file >> this->rowToChunkMapping
+        >> this->chunksToSegmentsMapping
+        >> this->rowToSliceMapping
+        >> this->rowPointers
+        >> this->slices;
+   file.load( &this->numberOfSlices );
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+printStructure( std::ostream& str ) const
+   //const IndexType numberOfSlices = this->getNumberOfSlices();
+   str << "Segments count: " << this->getSize() << std::endl
+       << "Slices: " << numberOfSlices << std::endl;
+   for( IndexType i = 0; i < numberOfSlices; i++ )
+      str << "   Slice " << i
+          << " : size = " << this->slices.getElement( i ).size
+          << " chunkSize = " << this->slices.getElement( i ).chunkSize
+          << " firstSegment = " << this->slices.getElement( i ).firstSegment
+          << " pointer = " << this->slices.getElement( i ).pointer << std::endl;
+   for( IndexType i = 0; i < this->getSize(); i++ )
+      str << "Segment " << i
+          << " : slice = " << this->rowToSliceMapping.getElement( i )
+          << " chunk = " << this->rowToChunkMapping.getElement( i ) << std::endl;
+#ifdef HAVE_CUDA
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch,
+             typename Reduction,
+             typename ResultKeeper,
+             typename Real,
+             typename... Args >
+ChunkedEllpackView< Device, Index, RowMajorOrder >::
+segmentsReductionKernel( IndexType gridIdx,
+                         IndexType first,
+                         IndexType last,
+                         Fetch fetch,
+                         Reduction reduction,
+                         ResultKeeper keeper,
+                         Real zero,
+                         Args... args ) const
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   const IndexType firstSlice = rowToChunkMapping[ first ] / chunksInSlice;
+   const IndexType lastSlice = rowToChunkMapping[ last - 1 ] / chunksInSlice;
+   //for( IndexType sliceIdx = firstSlice; sliceIdx < lastSlice; sliceIdx++ )
+   {
+      const IndexType sliceIdx = gridIdx * Cuda::getMaxGridSize() + blockIdx.x;
+      if( sliceIdx >= lastSlice )
+         return;
+      RealType* chunksResults = Cuda::getSharedMemory< RealType >();
+      //for( IndexType threadIdx = 0; threadIdx < 256; threadIdx++ )
+      //{
+         __shared__ details::ChunkedEllpackSliceInfo< IndexType > sliceInfo;
+         if( threadIdx.x == 0 )
+            sliceInfo = this->slices[ sliceIdx ];
+         chunksResults[ threadIdx.x ] = zero;
+         __syncthreads();
+         const IndexType sliceOffset = sliceInfo.pointer;
+         const IndexType chunkSize = sliceInfo.chunkSize;
+         const IndexType chunkIdx = sliceIdx * chunksInSlice + threadIdx.x;
+         const IndexType segmentIdx = this->chunksToSegmentsMapping[ chunkIdx ];
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != sliceInfo.firstSegment )
+            firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];
+         IndexType localIdx = ( threadIdx.x - firstChunkOfSegment ) * chunkSize;
+         bool compute( true );
+         if( RowMajorOrder )
+         {
+            IndexType begin = sliceOffset + threadIdx.x * chunkSize; // threadIdx.x = chunkIdx within the slice
+            IndexType end = begin + chunkSize;
+            for( IndexType j = begin; j < end && compute; j++ )
+               reduction( chunksResults[ threadIdx.x ], fetch( segmentIdx, localIdx++, j, compute, args...) );
+         }
+         else
+         {
+            const IndexType begin = sliceOffset + threadIdx.x; // threadIdx.x = chunkIdx within the slice
+            const IndexType end = begin + chunksInSlice * chunkSize;
+               for( IndexType j = begin; j < end && compute; j += chunksInSlice )
+                  reduction( chunksResults[ threadIdx.x ], fetch( segmentIdx, localIdx++, j, compute, args...) );
+         }
+         __syncthreads();
+      //}
+      //details::ChunkedEllpackSliceInfo< IndexType > sliceInfo;
+      //for( IndexType threadIdx = 0; threadIdx < 256; threadIdx++ )
+      //{
+         //if( threadIdx == 0 )
+         //   sliceInfo = this->slices[ sliceIdx ];
+         if( threadIdx.x < sliceInfo.size )
+         {
+            const IndexType row = sliceInfo.firstSegment + threadIdx.x;
+            IndexType chunkIndex( 0 );
+            if( threadIdx.x != 0 )
+               chunkIndex = this->rowToChunkMapping[ row - 1 ];
+            const IndexType lastChunk = this->rowToChunkMapping[ row ];
+            RealType result( zero );
+            while( chunkIndex < lastChunk )
+               reduction( result,  chunksResults[ chunkIndex++ ] );
+            keeper( row, result );
+         }
+      //} // threadIdx
+   } // sliceIdx
+      } // namespace Segments
+   }  // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/details/ChunkedEllpack.h b/src/TNL/Containers/Segments/details/ChunkedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..8807de226cf2d7648a95cc2d788b27a5775767ac
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/ChunkedEllpack.h
@@ -0,0 +1,229 @@
+                          ChunkedEllpack.h -  description
+                             -------------------
+    begin                : Mar 25, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <type_traits>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/ChunkedEllpackSegmentView.h>
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+         namespace details {
+ * In the ChunkedEllpack, the segments are split into slices. This is done
+ * in ChunkedEllpack::resolveSliceSizes. All segments elements in each slice
+ * are split into chunks. All chunks in one slice have the same size, but the size
+ * of chunks can be different in each slice.
+ */
+template< typename Index >
+struct ChunkedEllpackSliceInfo
+   /**
+    * The size of the slice, it means the number of the segments covered by
+    * the slice.
+    */
+   Index size;
+   /**
+    * The chunk size, i.e. maximal number of non-zero elements that can be stored
+    * in the chunk.
+    */
+   Index chunkSize;
+   /**
+    * Index of the first segment covered be this slice.
+    */
+   Index firstSegment;
+   /**
+    * Position of the first element of this slice.
+    */
+   Index pointer;
+template< typename Index,
+          typename Device,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class ChunkedEllpack
+   public:
+      using DeviceType = Device;
+      using IndexType = Index;
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using OffsetsHolderView = typename OffsetsHolder::ViewType;
+      using SegmentsSizes = OffsetsHolder;
+      using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
+      using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
+      using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
+      using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ViewType;
+      using SegmentViewType = ChunkedEllpackSegmentView< IndexType >;
+      __cuda_callable__ static
+      IndexType getSegmentSizeDirect( const OffsetsHolderView& segmentsToSlicesMapping,
+                                      const ChunkedEllpackSliceInfoContainerView& slices,
+                                      const OffsetsHolderView& segmentsToChunksMapping,
+                                      const IndexType segmentIdx )
+      {
+         const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ];
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices[ sliceIndex ].firstSegment )
+            firstChunkOfSegment = segmentsToChunksMapping[ segmentIdx - 1 ];
+         const IndexType lastChunkOfSegment = segmentsToChunksMapping[ segmentIdx ];
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
+         return chunkSize * segmentChunksCount;
+      }
+      static
+      IndexType getSegmentSize( const OffsetsHolderView& segmentsToSlicesMapping,
+                                const ChunkedEllpackSliceInfoContainerView& slices,
+                                const OffsetsHolderView& segmentsToChunksMapping,
+                                const IndexType segmentIdx )
+      {
+         const IndexType& sliceIndex = segmentsToSlicesMapping.getElement( segmentIdx );
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices.getElement( sliceIndex ).firstSegment )
+            firstChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx - 1 );
+         const IndexType lastChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx );
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize;
+         return chunkSize * segmentChunksCount;
+      }
+      __cuda_callable__ static
+      IndexType getGlobalIndexDirect( const OffsetsHolderView& segmentsToSlicesMapping,
+                                      const ChunkedEllpackSliceInfoContainerView& slices,
+                                      const OffsetsHolderView& segmentsToChunksMapping,
+                                      const IndexType chunksInSlice,
+                                      const IndexType segmentIdx,
+                                      const IndexType localIdx )
+      {
+         const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ];
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices[ sliceIndex ].firstSegment )
+            firstChunkOfSegment = segmentsToChunksMapping[ segmentIdx - 1 ];
+         const IndexType lastChunkOfSegment = segmentsToChunksMapping[ segmentIdx ];
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType sliceOffset = slices[ sliceIndex ].pointer;
+         const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
+         TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" );
+         if( RowMajorOrder )
+            return sliceOffset + firstChunkOfSegment * chunkSize + localIdx;
+         else
+         {
+            const IndexType inChunkOffset = localIdx % chunkSize;
+            const IndexType chunkIdx = localIdx / chunkSize;
+            return sliceOffset + inChunkOffset * chunksInSlice + firstChunkOfSegment + chunkIdx;
+         }
+      }
+      static
+      IndexType getGlobalIndex( const OffsetsHolderView& segmentsToSlicesMapping,
+                                const ChunkedEllpackSliceInfoContainerView& slices,
+                                const OffsetsHolderView& segmentsToChunksMapping,
+                                const IndexType chunksInSlice,
+                                const IndexType segmentIdx,
+                                const IndexType localIdx )
+      {
+         const IndexType& sliceIndex = segmentsToSlicesMapping.getElement( segmentIdx );
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices.getElement( sliceIndex ).firstSegment )
+            firstChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx - 1 );
+         const IndexType lastChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx );
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType sliceOffset = slices.getElement( sliceIndex ).pointer;
+         const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize;
+         TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" );
+         if( RowMajorOrder )
+            return sliceOffset + firstChunkOfSegment * chunkSize + localIdx;
+         else
+         {
+            const IndexType inChunkOffset = localIdx % chunkSize;
+            const IndexType chunkIdx = localIdx / chunkSize;
+            return sliceOffset + inChunkOffset * chunksInSlice + firstChunkOfSegment + chunkIdx;
+         }
+      }
+      static __cuda_callable__
+      SegmentViewType getSegmentViewDirect( const OffsetsHolderView& segmentsToSlicesMapping,
+                                            const ChunkedEllpackSliceInfoContainerView& slices,
+                                            const OffsetsHolderView& segmentsToChunksMapping,
+                                            const IndexType chunksInSlice,
+                                            const IndexType segmentIdx )
+      {
+         const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ];
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices[ sliceIndex ].firstSegment )
+            firstChunkOfSegment = segmentsToChunksMapping[ segmentIdx - 1 ];
+         const IndexType lastChunkOfSegment = segmentsToChunksMapping[ segmentIdx ];
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType sliceOffset = slices[ sliceIndex ].pointer;
+         const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
+         const IndexType segmentSize = segmentChunksCount * chunkSize;
+         if( RowMajorOrder )
+            return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize,
+                                    segmentSize,
+                                    chunkSize,
+                                    chunksInSlice );
+         else
+            return SegmentViewType( sliceOffset + firstChunkOfSegment,
+                                    segmentSize,
+                                    chunkSize,
+                                    chunksInSlice );
+      }
+      static __cuda_callable__
+      SegmentViewType getSegmentView( const OffsetsHolderView& segmentsToSlicesMapping,
+                                      const ChunkedEllpackSliceInfoContainerView& slices,
+                                      const OffsetsHolderView& segmentsToChunksMapping,
+                                      const IndexType chunksInSlice,
+                                      const IndexType segmentIdx )
+      {
+         const IndexType& sliceIndex = segmentsToSlicesMapping.getElement( segmentIdx );
+         IndexType firstChunkOfSegment( 0 );
+         if( segmentIdx != slices.getElement( sliceIndex ).firstSegment )
+            firstChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx - 1 );
+         const IndexType lastChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx );
+         const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
+         const IndexType sliceOffset = slices.getElement( sliceIndex ).pointer;
+         const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize;
+         const IndexType segmentSize = segmentChunksCount * chunkSize;
+         if( RowMajorOrder )
+            return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize,
+                                    segmentSize,
+                                    chunkSize,
+                                    chunksInSlice );
+         else
+            return SegmentViewType( sliceOffset + firstChunkOfSegment,
+                                    segmentSize,
+                                    chunkSize,
+                                    chunksInSlice );
+      }
+         } //namespace details
+      } //namespace Segments
+   } //namespace Containers
+} //namepsace TNL
diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h
deleted file mode 100644
index ecfe63107325793717482b3710c9533a153c34c1..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Segments/details/Ellpack.h
+++ /dev/null
@@ -1,105 +0,0 @@
-                          Ellpack.h -  description
-                             -------------------
-    begin                : Dec 3, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#pragma once
-#include <TNL/Containers/Vector.h>
-namespace TNL {
-   namespace Containers {
-      namespace Segments {
-template< typename Device,
-          typename Index,
-          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
-          int Alignment = 32 >
-class Ellpack
-   public:
-      using DeviceType = Device;
-      using IndexType = Index;
-      static constexpr int getAlignment() { return Alignment; }
-      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
-      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
-      using SegmentsSizes = OffsetsHolder;
-      Ellpack();
-      Ellpack( const SegmentsSizes& sizes );
-      Ellpack( const IndexType segmentsCount, const IndexType segmentSize );
-      Ellpack( const Ellpack& segments );
-      Ellpack( const Ellpack&& segments );
-      /**
-       * \brief Set sizes of particular segments.
-       */
-      template< typename SizesHolder = OffsetsHolder >
-      void setSegmentsSizes( const SizesHolder& sizes );
-      void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize );
-      /**
-       * \brief Number segments.
-       */
-      __cuda_callable__
-      IndexType getSegmentsCount() const;
-      __cuda_callable__
-      IndexType getSegmentSize( const IndexType segmentIdx ) const;
-      __cuda_callable__
-      IndexType getSize() const;
-      __cuda_callable__
-      IndexType getStorageSize() const;
-      __cuda_callable__
-      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-      /***
-       * \brief Go over all segments and for each segment element call
-       * function 'f' with arguments 'args'. The return type of 'f' is bool.
-       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
-       * is terminated.
-       */
-      template< typename Function, typename... Args >
-      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
-      template< typename Function, typename... Args >
-      void forAll( Function& f, Args... args ) const;
-      /***
-       * \brief Go over all segments and perform a reduction in each of them.
-       */
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
-      void save( File& file ) const;
-      void load( File& file );
-   protected:
-      IndexType segmentSize, size, alignedSize;
-      } // namespace Segements
-   }  // namespace Conatiners
-} // namespace TNL
diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h
deleted file mode 100644
index 6f185bc469e1c1826348b5662735d6a2992fc087..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Segments/details/SlicedEllpack.h
+++ /dev/null
@@ -1,104 +0,0 @@
-                          SlicedEllpack.h -  description
-                             -------------------
-    begin                : Dec 4, 2019
-    copyright            : (C) 2019 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#pragma once
-#include <TNL/Containers/Vector.h>
-namespace TNL {
-   namespace Containers {
-      namespace Segments {
-template< typename Device,
-          typename Index,
-          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
-          int SliceSize = 32 >
-class SlicedEllpack
-   public:
-      using DeviceType = Device;
-      using IndexType = Index;
-      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
-      static constexpr int getSliceSize() { return SliceSize; }
-      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
-      SlicedEllpack();
-      SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
-      SlicedEllpack( const SlicedEllpack& segments );
-      SlicedEllpack( const SlicedEllpack&& segments );
-      /**
-       * \brief Set sizes of particular segments.
-       */
-      template< typename SizesHolder = OffsetsHolder >
-      void setSegmentsSizes( const SizesHolder& sizes );
-      __cuda_callable__
-      IndexType getSegmentsCount() const;
-      __cuda_callable__
-      IndexType getSegmentSize( const IndexType segmentIdx ) const;
-      /**
-       * \brief Number segments.
-       */
-      __cuda_callable__
-      IndexType getSize() const;
-      __cuda_callable__
-      IndexType getStorageSize() const;
-      __cuda_callable__
-      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-      /***
-       * \brief Go over all segments and for each segment element call
-       * function 'f' with arguments 'args'. The return type of 'f' is bool.
-       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
-       * is terminated.
-       */
-      template< typename Function, typename... Args >
-      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
-      template< typename Function, typename... Args >
-      void forAll( Function& f, Args... args ) const;
-      /***
-       * \brief Go over all segments and perform a reduction in each of them.
-       */
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
-      void save( File& file ) const;
-      void load( File& file );
-   protected:
-      IndexType size, alignedSize, segmentsCount;
-      OffsetsHolder sliceOffsets, sliceSegmentSizes;
-      } // namespace Segements
-   }  // namespace Conatiners
-} // namespace TNL
diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h
index 51ee055066fab43d3eaca7a53e5a1bc1bee2abb0..5702d9fe7375df94177f67d1e65cdaf3e65ffa4b 100644
--- a/src/TNL/Containers/StaticArray.h
+++ b/src/TNL/Containers/StaticArray.h
@@ -227,7 +227,6 @@ public:
     * \brief Sets all values of this static array to \e val.
-   [[deprecated( "Use of StaticArray::setValue is deprecated, assignment operator=() should be used instead." )]]
    void setValue( const ValueType& val );
diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h
index 259081052c1d0c1241f190cdad4002f98b3bf2d0..685a7f2068e665c7767b03350101367fa06ee123 100644
--- a/src/TNL/Containers/Vector.h
+++ b/src/TNL/Containers/Vector.h
@@ -112,6 +112,16 @@ public:
    Vector( Vector&& ) = default;
+   /**
+    * \brief Constructor from expression template
+    * 
+    * @param expression input expression template
+    */
+   template< typename VectorExpression,
+             typename...,
+             typename = std::enable_if_t< Expressions::HasEnabledExpressionTemplates< VectorExpression >::value && ! IsArrayType< VectorExpression >::value > >
+   explicit Vector( const VectorExpression& expression );
     * \brief Copy-assignment operator for copying data from another vector.
diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp
index 5fdce0d09d2adb53b7c19e971fdf3b0a545891a5..b25ccbb5ac5ef117d17c024de38f5549d77f0b9e 100644
--- a/src/TNL/Containers/Vector.hpp
+++ b/src/TNL/Containers/Vector.hpp
@@ -27,6 +27,20 @@ Vector( const Vector& vector,
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename Allocator >
+   template< typename VectorExpression,
+             typename...,
+             typename >
+Vector< Real, Device, Index, Allocator >::
+Vector( const VectorExpression& expression )
+   detail::VectorAssignment< Vector, VectorExpression >::resize( *this, expression );
+   detail::VectorAssignment< Vector, VectorExpression >::assign( *this, expression );
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Devices/AnyDevice.h b/src/TNL/Devices/AnyDevice.h
new file mode 100644
index 0000000000000000000000000000000000000000..8c82847293aa14cb18334bf4d3d3e3f1ab95d2b6
--- /dev/null
+++ b/src/TNL/Devices/AnyDevice.h
@@ -0,0 +1,35 @@
+                          AnyDevice.h  -  description
+                             -------------------
+    begin                : Mar 17, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Devices/Host.h>
+namespace TNL {
+namespace Devices {
+class AnyDevice
+template< typename Device >
+struct PickDevice
+   using DeviceType = Device;
+struct PickDevice< Devices::AnyDevice >
+   using DeviceType = Devices::Host;
+} // namespace Devices
+} // namespace TNL
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index ada48ee0297438c717772433fb6a09972f2d49e8..6a4795a7e748a26f32536f912f03886e3305bd9a 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -57,6 +57,8 @@ class Dense : public Matrix< Real, Device, Index >
       Dense( const IndexType rows, const IndexType columns );
+      Dense( std::initializer_list< std::initializer_list< RealType > > data );
       ViewType getView();
       ConstViewType getConstView() const;
@@ -71,7 +73,16 @@ class Dense : public Matrix< Real, Device, Index >
       template< typename Matrix >
       void setLike( const Matrix& matrix );
-      /****
+      /**
+       * \brief This method creates dense matrix from 2D initializer list.
+       * 
+       * The matrix dimensions will be adjusted by the input data.
+       * 
+       * @param data
+       */
+      void setElements( std::initializer_list< std::initializer_list< RealType > > data );
+      /**
        * This method is only for the compatibility with the sparse matrices.
       void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 346c26ed8a628d5737e3dce143340a42364e5a9a..28f152444aeeedd8625055a59445e82530c7d7fb 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -37,6 +37,57 @@ Dense( const IndexType rows, const IndexType columns )
    this->setDimensions( rows, columns );
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Dense( std::initializer_list< std::initializer_list< RealType > > data )
+   this->setElements( data );
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setElements( std::initializer_list< std::initializer_list< RealType > > data )
+   IndexType rows = data.size();
+   IndexType columns = 0;
+   for( auto row : data )
+      columns = max( columns, row.size() );
+   this->setDimensions( rows, columns );
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+      Dense< RealType, Devices::Host, IndexType > hostDense( rows, columns );
+      IndexType rowIdx( 0 );
+      for( auto row : data )
+      {
+         IndexType columnIdx( 0 );
+         for( auto element : row )
+            hostDense.setElement( rowIdx, columnIdx++, element );
+         rowIdx++;
+      }
+      *this = hostDense;
+   }
+   else
+   {
+      IndexType rowIdx( 0 );
+      for( auto row : data )
+      {
+         IndexType columnIdx( 0 );
+         for( auto element : row )
+            this->setElement( rowIdx, columnIdx++, element );
+         rowIdx++;
+      }
+   }
 template< typename Real,
           typename Device,
           typename Index,
@@ -48,8 +99,7 @@ getView() -> ViewType
    return ViewType( this->getRows(),
-                    this->getValues().getView(),
-                    this->segments.getView() );
+                    this->getValues().getView() );
 template< typename Real,
@@ -63,8 +113,7 @@ getConstView() const -> ConstViewType
    return ConstViewType( this->getRows(),
-                         this->getValues().getConstView(),
-                         this->segments.getConstView() );
+                         this->getValues().getConstView() );
 template< typename Real,
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index 95a7c47698fc27f7fa760a64c0176a147ebe391c..a7e1a09a78f336b94ae9f62ab84d2846d2e24602 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -64,8 +64,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       DenseMatrixView( const IndexType rows,
                        const IndexType columns,
-                       const ValuesViewType& values,
-                       const SegmentsViewType& segments );
+                       const ValuesViewType& values );
       DenseMatrixView( const DenseMatrixView& m ) = default;
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 01415ec21c2446a255db57aae7df04cbe5813ed8..ddd9c93281b70a10d25e05768d409f41303e4774 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -35,10 +35,11 @@ __cuda_callable__
 DenseMatrixView< Real, Device, Index, RowMajorOrder >::
 DenseMatrixView( const IndexType rows,
                  const IndexType columns,
-                 const ValuesViewType& values,
-                 const SegmentsViewType& segments )
- : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments )
+                 const ValuesViewType& values )
+ : MatrixView< Real, Device, Index >( rows, columns, values )
+   SegmentsType a( rows, columns );
+   segments = a.getView();
 template< typename Real,
@@ -53,8 +54,7 @@ getView() -> ViewType
    return ViewType( this->getRows(),
-                    this->columnIndexes.getView(),
-                    this->segments.getView() );
+                    this->columnIndexes.getView() );
 template< typename Real,
@@ -69,8 +69,7 @@ getConstView() const -> ConstViewType
    return ConstViewType( this->getRows(),
-                         this->getColumnsIndexes().getConstView(),
-                         this->segments.getConstView() );
+                         this->getColumnsIndexes().getConstView() );
 template< typename Real,
diff --git a/src/TNL/Matrices/LambdaMatrix.h b/src/TNL/Matrices/LambdaMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..5f3ecdfb36bc447f44578c4c0493ae635b1e7cd4
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrix.h
@@ -0,0 +1,164 @@
+                          LambdaMatrix.h -  description
+                             -------------------
+    begin                : Mar 17, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/String.h>
+#include <TNL/Devices/AnyDevice.h>
+namespace TNL {
+namespace Matrices {
+ * \brief "Matrix-free" matrix based on lambda functions.
+ * 
+ * \tparam MatrixElementsLambda is a lambda function returning matrix elements
+ *    values and positions.
+ * \tparam CompressedRowLengthsLambda is a lambda function returning a number
+ *    of non-zero elements in each row.
+ * \tparam Real is a type of matrix elements values.
+ * \tparam Device is a device on which the lambda functions can evaluated. 
+ *    Devices::AnyDevice can be used for lambdas with no restriction.
+ * \ẗparam Index is a type used for indexing.
+ */
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real = double,
+          typename Device = Devices::AnyDevice,
+          typename Index = int >
+class LambdaMatrix
+   public:
+      static constexpr bool isSymmetric() { return false; };
+      static constexpr bool isBinary() { return false; };
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      LambdaMatrix( MatrixElementsLambda& matrixElements,
+                    CompressedRowLengthsLambda& compressedRowLentghs );
+      LambdaMatrix( const IndexType& rows,
+                    const IndexType& columns,
+                    MatrixElementsLambda& matrixElements,
+                    CompressedRowLengthsLambda& compressedRowLentghs );
+      void setDimensions( const IndexType& rows,
+                         const IndexType& columns );
+      __cuda_callable__
+      IndexType getRows() const;
+      __cuda_callable__
+      IndexType getColumns() const;
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+            template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+      /***
+       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType& matrixMultiplicator = 1.0,
+                          const RealType& outVectorMultiplicator = 0.0 ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+      template< typename Function >
+      void forAllRows( Function& function );
+      template< typename Vector1, typename Vector2 >
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+      void print( std::ostream& str ) const;
+   protected:
+      IndexType rows, columns;
+      MatrixElementsLambda matrixElementsLambda;
+      CompressedRowLengthsLambda compressedRowLengthsLambda;
+ * \brief Helper class for creating instances of LambdaMatrix.
+ * @param matrixElementsLambda
+ * @param compressedRowLengthsLambda
+ * @return 
+ */
+template< typename Real = double,
+          typename Device = Devices::AnyDevice,
+          typename Index = int >
+struct LambdaMatrixFactory
+   using RealType = Real;
+   using IndexType = Index;
+   template< typename MatrixElementsLambda,
+             typename CompressedRowLengthsLambda >
+   static auto create( MatrixElementsLambda& matrixElementsLambda,
+                       CompressedRowLengthsLambda& compressedRowLengthsLambda )
+   -> LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >
+   {
+      return LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >(
+         matrixElementsLambda,
+         compressedRowLengthsLambda );
+   };
+   template< typename MatrixElementsLambda,
+             typename CompressedRowLengthsLambda >
+   static auto create( const IndexType& rows,
+                       const IndexType& columns,
+                       MatrixElementsLambda& matrixElementsLambda,
+                       CompressedRowLengthsLambda& compressedRowLengthsLambda )
+   -> LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >
+   {
+      return LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >(
+         matrixElementsLambda,
+         compressedRowLengthsLambda );
+   };
+} //namespace Matrices
+} //namespace TNL
+#include <TNL/Matrices/LambdaMatrix.hpp>
diff --git a/src/TNL/Matrices/LambdaMatrix.hpp b/src/TNL/Matrices/LambdaMatrix.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c992bd575bd7f2e09bbd50ded60191a90ceca297
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrix.hpp
@@ -0,0 +1,359 @@
+                          LambdaMatrix.hpp -  description
+                             -------------------
+    begin                : Mar 17, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/LambdaMatrix.h>
+namespace TNL {
+namespace Matrices {
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+LambdaMatrix( MatrixElementsLambda& matrixElements,
+              CompressedRowLengthsLambda& compressedRowLengths )
+: rows( 0 ), columns( 0 ), matrixElementsLambda( matrixElements ), compressedRowLengthsLambda( compressedRowLengths )
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+LambdaMatrix( const IndexType& rows,
+              const IndexType& columns,
+              MatrixElementsLambda& matrixElements,
+              CompressedRowLengthsLambda& compressedRowLengths )
+: rows( rows ), columns( columns ), matrixElementsLambda( matrixElements ), compressedRowLengthsLambda( compressedRowLengths )
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+setDimensions( const IndexType& rows,
+               const IndexType& columns )
+   this->rows = rows;
+   this->columns = columns;
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getRows() const
+   return this->rows;
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getColumns() const
+   return this->columns;
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getCompressedRowLengths( Vector& rowLengths ) const
+   using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType;
+   rowLengths.setSize( this->getRows() );
+   const IndexType rows = this->getRows();
+   const IndexType columns = this->getColumns();
+   auto rowLengthsView = rowLengths.getView();
+   auto compressedRowLengths = this->compressedRowLengthsLambda;
+   if( std::is_same< typename Vector::DeviceType, Device_ >::value )
+      Algorithms::ParallelFor< Device_ >::exec(
+         ( IndexType ) 0,
+         this->getRows(),
+         [=] __cuda_callable__ ( const IndexType row ) mutable {
+            rowLengthsView[ row ] = compressedRowLengths( rows, columns, row );
+         } );
+   else
+   {
+      Containers::Vector< IndexType, Device_, IndexType > aux( this->getRows() );
+      auto auxView = aux.getView();
+      Algorithms::ParallelFor< Device_ >::exec(
+         ( IndexType ) 0,
+         this->getRows(),
+         [=] __cuda_callable__ ( const IndexType row ) mutable {
+            auxView[ row ] = compressedRowLengths( rows, columns, row );
+         } );
+      rowLengths = aux;
+   }
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getNumberOfNonzeroMatrixElements() const
+   Containers::Vector< IndexType, typename Devices::PickDevice< DeviceType >::DeviceType, IndexType > rowLengthsVector;
+   this->getCompressedRowLengths( rowLengthsVector );
+   return sum( rowLengthsVector );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getElement( const IndexType row,
+            const IndexType column ) const
+   using Device_ = typename Devices::PickDevice< Devices::Host >::DeviceType;
+   Containers::Array< RealType, Device_ > value( 1 );
+   auto valueView = value.getView();
+   auto rowLengths = this->compressedRowLengthsLambda;
+   auto matrixElements = this->matrixElementsLambda;
+   const IndexType rows = this->getRows();
+   const IndexType columns = this->getColumns();
+   auto getValue = [=] __cuda_callable__ (  IndexType rowIdx ) mutable {
+      const IndexType rowSize = rowLengths( rows, columns, row );
+      valueView[ 0 ] = 0.0;
+      for( IndexType localIdx = 0; localIdx < rowSize; localIdx++ )
+      {
+         RealType elementValue;
+         IndexType elementColumn;
+         matrixElements( rows, columns, row, localIdx, elementColumn, elementValue );
+         if( elementColumn == column )
+         {
+            valueView[ 0 ] = elementValue;
+            break;
+         }
+      }
+   };
+   Algorithms::ParallelFor< Device_ >::exec( row, row + 1, getValue );
+   return valueView.getElement( 0 );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+      template< typename Vector >
+typename Vector::RealType
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+rowVectorProduct( const IndexType row,
+                  const Vector& vector ) const
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename InVector,
+             typename OutVector >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType& matrixMultiplicator,
+               const RealType& outVectorMultiplicator ) const
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType {
+      if( value == 0.0 )
+         return 0.0;
+      return value * inVectorView[ columnIdx ];
+   };
+   auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      if( outVectorMultiplicator == 0.0 )
+         outVectorView[ row ] = matrixMultiplicator * value;
+      else
+         outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0.0 );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+   using FetchType = decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) );
+   using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType;
+   const IndexType rows = this->getRows();
+   const IndexType columns = this->getColumns();
+   auto rowLengths = this->compressedRowLengthsLambda;
+   auto matrixElements = this->matrixElementsLambda;
+   auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      const IndexType rowLength = rowLengths( rows, columns, rowIdx );
+      FetchType result( zero );
+      for( IndexType localIdx = 0; localIdx < rowLength; localIdx++ )
+      {
+        IndexType elementColumn( 0 );
+        RealType elementValue( 0.0 );
+        matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue );
+        FetchType fetchValue( zero );
+        if( elementValue != 0.0 )
+            fetchValue = fetch( rowIdx, localIdx, elementColumn, elementValue );
+        reduce( result, fetchValue );
+      }
+      keep( rowIdx, result );
+   };
+   Algorithms::ParallelFor< Device_ >::exec( first, last, processRow );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Function >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+forRows( IndexType first, IndexType last, Function& function ) const
+   using FetchType = decltype( fetch( IndexType(), IndexType(), RealType(), IndexType() ) );
+   using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType;
+   const IndexType rows = this->getRows();
+   const IndexType columns = this->getColumns();
+   auto rowLengths = this->compressedRowLengthsLambda;
+   auto matrixElements = this->matrixElementsLambda;
+   auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      const IndexType rowLength = rowLengths( rows, columns, rowIdx );
+      bool compute( true );
+      for( IndexType localIdx = 0; localIdx < rowLength && compute; localIdx++ )
+      {
+        IndexType elementColumn( 0 );
+        RealType elementValue( 0.0 );
+        matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue );
+        if( elementValue != 0.0 )
+            function( rowIdx, localIdx, elementColumn, elementValue, compute );
+      }
+   };
+   Algorithms::ParallelFor< Device_ >::exec( first, last, processRow );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Function >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+forRows( IndexType first, IndexType last, Function& function )
+   this->forRows( 0, this->getRows(), function );
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector1, typename Vector2 >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+performSORIteration( const Vector1& b,
+                          const IndexType row,
+                          Vector2& x,
+                          const RealType& omega ) const
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+print( std::ostream& str ) const
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+      {
+         auto value = this->getElement( row, column );
+         if( value != ( RealType ) 0 )
+            str << " Col:" << column << "->" << value << "\t";
+      }
+      str << std::endl;
+   }
+} //namespace Matrices
+} //namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 032767518cc275e707961af48e474f4210256ef1..d012f918b82347b6aaa54f9eb3059e45805323c4 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -10,6 +10,7 @@
 #pragma once
+#include <map>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Allocators/Default.h>
@@ -92,6 +93,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+      template< typename MapIndex,
+                typename MapValue >
+      explicit SparseMatrix( const IndexType rows,
+                             const IndexType columns,
+                             const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map );
       ViewType getView() const; // TODO: remove const
       ConstViewType getConstView() const;
@@ -110,6 +117,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data );
+      template< typename MapIndex,
+                typename MapValue >
+      void setElements( const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map );
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
@@ -225,6 +236,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       IndexType getPaddingIndex() const;
+      SegmentsType& getSegments();
+      const SegmentsType& getSegments() const;
 // TODO: restore it and also in Matrix
 //   protected:
@@ -234,8 +249,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       IndexAllocator indexAllocator;
-      //RealAllocator realAllocator;
       ViewType view;
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 4c1f3b1ce41d27adf2a804e7171ec21f89ba7313..3a557d188113484b320cd56a51f4303e191be05e 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -109,6 +109,24 @@ SparseMatrix( const IndexType rows,
    this->setElements( data );
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename MapIndex,
+             typename MapValue >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const IndexType rows,
+              const IndexType columns,
+              const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map )
+   this->setDimensions( rows, columns );
+   this->setElements( map );
 template< typename Real,
           typename Device,
           typename Index,
@@ -247,6 +265,38 @@ setElements( const std::initializer_list< std::tuple< IndexType, IndexType, Real
    ( *this ) = hostMatrix;
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename MapIndex,
+             typename MapValue >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setElements( const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map )
+   Containers::Vector< IndexType, Devices::Host, IndexType > rowsCapacities( this->getRows(), 0 );
+   for( auto element : map )
+      rowsCapacities[ element.first.first ]++;
+   if( !std::is_same< DeviceType, Devices::Host >::value )
+   {
+      SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( this->getRows(), this->getColumns() );
+      hostMatrix.setCompressedRowLengths( rowsCapacities );
+      for( auto element : map )
+         hostMatrix.setElement( element.first.first, element.first.second, element.second );
+      *this = hostMatrix;
+   }
+   else
+   {
+      this->setCompressedRowLengths( rowsCapacities );
+      for( auto element : map )
+         this->setElement( element.first.first, element.first.second, element.second );
+   }
 template< typename Real,
           typename Device,
           typename Index,
@@ -816,7 +866,7 @@ operator=( const RHSMatrix& matrix )
                const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
                matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
                matrixValuesBuffer_view[ bufferIdx ] = value;
-               //std::cerr << " <<<<< rowIdx = " << rowIdx << " localIdx = " << localIdx << " value = " << value << " bufferIdx = " << bufferIdx << std::endl;
+               //printf( "TO BUFFER: rowIdx = %d localIdx = %d bufferIdx = %d column = %d value = %d \n", rowIdx, localIdx, bufferIdx, columnIndex, value );
          matrix.forRows( baseRow, lastRow, f1 );
@@ -951,5 +1001,33 @@ getPaddingIndex() const
    return -1;
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getSegments() -> SegmentsType&
+   return this->segments;
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getSegments() const -> const SegmentsType&
+   return this->segments;
    } //namespace Matrices
 } // namespace  TNL
diff --git a/src/TNL/String.h b/src/TNL/String.h
index f35abc377177b6b061b68074714ce3e143b55d22..228cb5bcb18d07cdf493ff9fdbc36c9539baef63 100644
--- a/src/TNL/String.h
+++ b/src/TNL/String.h
@@ -375,7 +375,7 @@ String convertToString( const T& value )
  * \brief Specialization of function \ref convertToString for boolean.
- * The boolean type is converted to 'true' ot 'false'.
+ * The boolean type is converted to 'true' or 'false'.
 template<> inline String convertToString( const bool& b )
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index 255a67fb911b995ea409c341fe9a00104b5d95bf..4f6fd7c92c8ea1198e6c9e521c26951df5060e15 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -149,6 +149,11 @@ TYPED_TEST( ArrayTest, constructors )
    EXPECT_EQ( w.getSize(), 10 );
+   Containers::Array< int > int_array( 10, 1 );
+   ArrayType int_array_copy( int_array );
+   for( int i = 0; i < 10; i++ )
+      EXPECT_EQ( int_array_copy.getElement( i ), 1 );
    ArrayType a1 { 1, 2, 3 };
    EXPECT_EQ( a1.getElement( 0 ), 1 );
    EXPECT_EQ( a1.getElement( 1 ), 2 );
diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h
index b6602ba141dbeb3a45470c58cf6130544b85d944..ca495abba4ab720deed2d7eb2c5d482229973fdc 100644
--- a/src/UnitTests/Containers/VectorTest.h
+++ b/src/UnitTests/Containers/VectorTest.h
@@ -72,6 +72,12 @@ TYPED_TEST( VectorTest, constructors )
    EXPECT_EQ( a3.getElement( 0 ), 7 );
    EXPECT_EQ( a3.getElement( 1 ), 8 );
    EXPECT_EQ( a3.getElement( 2 ), 9 );
+   VectorType a4( 2 * a2 + 3 * a3 );
+   EXPECT_EQ( a4.getElement( 0 ), 2.0 * a2.getElement( 0 ) + 3 * a3.getElement( 0 ) );
+   EXPECT_EQ( a4.getElement( 1 ), 2.0 * a2.getElement( 1 ) + 3 * a3.getElement( 1 ) );
+   EXPECT_EQ( a4.getElement( 2 ), 2.0 * a2.getElement( 2 ) + 3 * a3.getElement( 2 ) );
 TEST( VectorSpecialCasesTest, defaultConstructors )
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 1c536a98210b59789d2a7b34a9b9935150a7e0ac..f2ffd0c4bab1af47284ee98a09ff1f1002c5647b 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -22,6 +22,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   CUDA_ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
    CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
@@ -40,6 +43,9 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   CUDA_ADD_EXECUTABLE( LambdaMatrixTest LambdaMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
@@ -69,6 +75,10 @@ ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp )
+   TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
    ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
@@ -93,6 +103,10 @@ ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE( LambdaMatrixTest LambdaMatrixTest.cpp )
@@ -102,12 +116,14 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT
 ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SymmetricSparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SymmetricSparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 37ae58bf1a1e7e8b03220c4916ba79cf48729ef9..8791b51fa6d8eb14a79f032a0cac0d1d91c653fd 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -46,42 +46,68 @@ void test_GetSerializationType()
 template< typename Matrix >
 void test_SetDimensions()
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
-    const IndexType rows = 9;
-    const IndexType cols = 8;
+   const IndexType rows = 9;
+   const IndexType cols = 8;
-    Matrix m;
-    m.setDimensions( rows, cols );
+   Matrix m;
+   m.setDimensions( rows, cols );
-    EXPECT_EQ( m.getRows(), 9 );
-    EXPECT_EQ( m.getColumns(), 8 );
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
 template< typename Matrix1, typename Matrix2 >
 void test_SetLike()
-    using RealType = typename Matrix1::RealType;
-    using DeviceType = typename Matrix1::DeviceType;
-    using IndexType = typename Matrix1::IndexType;
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
-    const IndexType rows = 8;
-    const IndexType cols = 7;
+   const IndexType rows = 8;
+   const IndexType cols = 7;
-    Matrix1 m1;
-    m1.reset();
-    m1.setDimensions( rows + 1, cols + 2 );
+   Matrix1 m1;
+   m1.reset();
+   m1.setDimensions( rows + 1, cols + 2 );
-    Matrix2 m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   Matrix2 m2;
+   m2.reset();
+   m2.setDimensions( rows, cols );
+   m1.setLike( m2 );
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
-    m1.setLike( m2 );
+template< typename Matrix >
+void test_SetElements()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
-    EXPECT_EQ( m1.getRows(), m2.getRows() );
-    EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+   Matrix m( {
+      { 1, 2, 3 },
+      { 4, 5, 6 },
+      { 7, 8, 9 },
+   } );
+   EXPECT_EQ( m.getRows(), 3 );
+   EXPECT_EQ( m.getColumns(), 3 );
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 2 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 3 );
+   EXPECT_EQ( m.getElement( 1, 0 ), 4 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 5 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 6 );
+   EXPECT_EQ( m.getElement( 2, 0 ), 7 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 8 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 9 );
 template< typename Matrix >
@@ -1386,6 +1412,13 @@ TYPED_TEST( MatrixTest, setLikeTest )
     test_SetLike< MatrixType, MatrixType >();
+TYPED_TEST( MatrixTest, setElementsTest )
+    using MatrixType = typename TestFixture::MatrixType;
+    test_SetElements< MatrixType >();
 TYPED_TEST( MatrixTest, getRowLengthTest )
     using MatrixType = typename TestFixture::MatrixType;
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.cpp b/src/UnitTests/Matrices/LambdaMatrixTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9da03ede3efa0addfc30c3245f26f35971d99514
--- /dev/null
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.cpp
@@ -0,0 +1,11 @@
+                          LambdaMatrixTest.cpp -  description
+                             -------------------
+    begin                : Mar 18, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include "LambdaMatrixTest.h"
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.cu b/src/UnitTests/Matrices/LambdaMatrixTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f7af7af4160ceaf59c39b95759936560bf0868f0
--- /dev/null
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.cu
@@ -0,0 +1,11 @@
+                          LambdaMatrixTest.cu -  description
+                             -------------------
+    begin                : Mar 18, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include "LambdaMatrixTest.h"
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.h b/src/UnitTests/Matrices/LambdaMatrixTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..07d1f336c4d5cc76c74bf19711fc4d2b68d3684b
--- /dev/null
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.h
@@ -0,0 +1,115 @@
+                          LambdaMatrixTest.h -  description
+                             -------------------
+    begin                : Mar 18, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Devices/AnyDevice.h>
+#include <TNL/Devices/Host.h>
+#include "LambdaMatrixTest.hpp"
+#include <iostream>
+template< typename Real,
+          typename Device,
+          typename Index >
+struct LambdaMatrixParameters
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+// test fixture for typed tests
+template< typename Matrix >
+class LambdaMatrixTest : public ::testing::Test
+   using LambdaMatrixType = Matrix;
+// types for which MatrixTest is instantiated
+using LambdaMatrixTypes = ::testing::Types
+   LambdaMatrixParameters< int,    TNL::Devices::Host, int >,
+   LambdaMatrixParameters< long,   TNL::Devices::Host, int >,
+   LambdaMatrixParameters< float,  TNL::Devices::Host, int >,
+   LambdaMatrixParameters< double, TNL::Devices::Host, int >,
+   LambdaMatrixParameters< int,    TNL::Devices::Host, long >,
+   LambdaMatrixParameters< long,   TNL::Devices::Host, long >,
+   LambdaMatrixParameters< float,  TNL::Devices::Host, long >,
+   LambdaMatrixParameters< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,LambdaMatrixParameters< int,    TNL::Devices::Cuda, int >,
+   LambdaMatrixParameters< long,   TNL::Devices::Cuda, int >,
+   LambdaMatrixParameters< float,  TNL::Devices::Cuda, int >,
+   LambdaMatrixParameters< double, TNL::Devices::Cuda, int >,
+   LambdaMatrixParameters< int,    TNL::Devices::Cuda, long >,
+   LambdaMatrixParameters< long,   TNL::Devices::Cuda, long >,
+   LambdaMatrixParameters< float,  TNL::Devices::Cuda, long >,
+   LambdaMatrixParameters< double, TNL::Devices::Cuda, long >
+TYPED_TEST_SUITE( LambdaMatrixTest, LambdaMatrixTypes);
+TYPED_TEST( LambdaMatrixTest, Constructors )
+   using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+   test_Constructors< LambdaMatrixParametersType >();
+TYPED_TEST( LambdaMatrixTest, setDimensionsTest )
+   using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+   test_SetDimensions< LambdaMatrixParametersType >();
+TYPED_TEST( LambdaMatrixTest, getCompressedRowLengthsTest )
+   using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+   test_GetCompressedRowLengths< LambdaMatrixParametersType >();
+TYPED_TEST( LambdaMatrixTest, getElementTest )
+   using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+   test_GetElement< LambdaMatrixParametersType >();
+TYPED_TEST( LambdaMatrixTest, vectorProductTest )
+    using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+    test_VectorProduct< LambdaMatrixParametersType >();
+TYPED_TEST( LambdaMatrixTest, rowsReduction )
+    using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+    test_RowsReduction< LambdaMatrixParametersType >();
+TYPED_TEST( LambdaMatrixTest, printTest )
+    using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+    test_Print< LambdaMatrixParametersType >();
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.hpp b/src/UnitTests/Matrices/LambdaMatrixTest.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..23963c11906431fa6f80926cb21c7d5d12913c51
--- /dev/null
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.hpp
@@ -0,0 +1,306 @@
+                          LambdaMatrixTest.h -  description
+                             -------------------
+    begin                : Mar 18, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include <iostream>
+#include <sstream>
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+template< typename Matrix >
+void test_Constructors()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { return 1; };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+         columnIdx = rowIdx;
+         value =  1.0;
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType m( size, size, matrixElements, rowLengths );
+   EXPECT_EQ( m.getRows(), size );
+   EXPECT_EQ( m.getColumns(), size );
+template< typename Matrix >
+void test_SetDimensions()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { return 1; };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+         columnIdx = rowIdx;
+         value =  1.0;
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType m( size, size, matrixElements, rowLengths );
+   EXPECT_EQ( m.getRows(), size );
+   EXPECT_EQ( m.getColumns(), size );
+   m.setDimensions( 10, 10 );
+   EXPECT_EQ( m.getRows(), 10 );
+   EXPECT_EQ( m.getColumns(), 10 );
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
+      if( rowIdx == 0 || rowIdx == size - 1 )
+         return 1;
+      return 3;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+      if( rowIdx == 0 || rowIdx == size -1 )
+      {
+         columnIdx = rowIdx;
+         value =  1.0;
+      }
+      else
+      {
+         columnIdx = rowIdx + localIdx - 1;
+         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
+      }
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType m( size, size, matrixElements, rowLengths );
+   TNL::Containers::Vector< IndexType > correctRowLengths{ 1, 3, 3, 3, 1 };
+   TNL::Containers::Vector< IndexType > rowLengthsVector;
+   m.getCompressedRowLengths( rowLengthsVector );
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( correctRowLengths[ i ], rowLengthsVector[ i ] );
+template< typename Matrix >
+void test_GetElement()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
+      if( rowIdx == 0 || rowIdx == size - 1 )
+         return 1;
+      return 3;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+      if( rowIdx == 0 || rowIdx == size -1 )
+      {
+         columnIdx = rowIdx;
+         value =  1.0;
+      }
+      else
+      {
+         columnIdx = rowIdx + localIdx - 1;
+         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
+      }
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType m( size, size, matrixElements, rowLengths );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1.0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0.0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0.0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0.0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0.0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  1.0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), -2.0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  1.0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0.0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0.0 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0.0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  1.0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), -2.0 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  1.0 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0.0 );
+   EXPECT_EQ( m.getElement( 3, 0 ),  0.0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0.0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  1.0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), -2.0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  1.0 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0.0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0.0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0.0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0.0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  1.0 );
+template< typename Matrix >
+void test_VectorProduct()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
+      if( rowIdx == 0 || rowIdx == size - 1 )
+         return 1;
+      return 3;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+      if( rowIdx == 0 || rowIdx == size -1 )
+      {
+         columnIdx = rowIdx;
+         value =  1.0;
+      }
+      else
+      {
+         columnIdx = rowIdx + localIdx - 1;
+         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
+      }
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType A( size, size, matrixElements, rowLengths );
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > x( size, 1.0 ), b( size, 5.0 );
+   A.vectorProduct( x, b );
+   EXPECT_EQ( b.getElement( 0 ),  1.0 );
+   EXPECT_EQ( b.getElement( 1 ),  0.0 );
+   EXPECT_EQ( b.getElement( 2 ),  0.0 );
+   EXPECT_EQ( b.getElement( 3 ),  0.0 );
+   EXPECT_EQ( b.getElement( 4 ),  1.0 );
+template< typename Matrix >
+void test_RowsReduction()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
+      if( rowIdx == 0 || rowIdx == size - 1 )
+         return 1;
+      return 3;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+      if( rowIdx == 0 || rowIdx == size -1 )
+      {
+         columnIdx = rowIdx;
+         value =  1.0;
+      }
+      else
+      {
+         columnIdx = rowIdx + localIdx - 1;
+         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
+      }
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType A( size, size, matrixElements, rowLengths );
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > v( size, -1.0 );
+   auto vView = v.getView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType {
+      return value;
+   };
+   auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      vView[ row ] = value;
+   };
+   A.allRowsReduction( fetch, reduce, keep, 0.0 );
+   EXPECT_EQ( v.getElement( 0 ),  1.0 );
+   EXPECT_EQ( v.getElement( 1 ),  0.0 );
+   EXPECT_EQ( v.getElement( 2 ),  0.0 );
+   EXPECT_EQ( v.getElement( 3 ),  0.0 );
+   EXPECT_EQ( v.getElement( 4 ),  1.0 );
+template< typename Matrix >
+void test_Print()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
+      if( rowIdx == 0 || rowIdx == size - 1 )
+         return 1;
+      return 3;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+      if( rowIdx == 0 || rowIdx == size -1 )
+      {
+         columnIdx = rowIdx;
+         value =  1.0;
+      }
+      else
+      {
+         columnIdx = rowIdx + localIdx - 1;
+         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
+      }
+   };
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+   MatrixType m( size, size, matrixElements, rowLengths );
+   std::stringstream printed;
+   std::stringstream couted;
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   m.print( std::cout ); //all the std::cout goes to ss
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1\t\n"
+             "Row: 1 ->  Col:0->1	 Col:1->-2	 Col:2->1\t\n"
+             "Row: 2 ->  Col:1->1	 Col:2->-2	 Col:3->1\t\n"
+             "Row: 3 ->  Col:2->1	 Col:3->-2	 Col:4->1\t\n"
+             "Row: 4 ->  Col:4->1\t\n";
+   EXPECT_EQ( printed.str(), couted.str() );
+#endif // HAVE_GTEST
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 30d3a692d1a843e90600bffa560314535762e7ad..12cdbeef3fca46946193ff95f7a9f8ab455e0d19 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -1,13 +1,15 @@
                           SparseMatrixTest.h -  description
-    begin                : Nov 22, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
 /* See Copyright Notice in tnl/Copyright */
+#pragma once
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Math.h>
@@ -15,1391 +17,103 @@
 #include <iostream>
 #include <sstream>
+#include "SparseMatrixTest.hpp"
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
-template< typename MatrixHostFloat, typename MatrixHostInt >
-void host_test_GetType()
-   bool testRan = false;
-   EXPECT_TRUE( testRan );
-   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
-template< typename MatrixCudaFloat, typename MatrixCudaInt >
-void cuda_test_GetType()
-   bool testRan = false;
-   EXPECT_TRUE( testRan );
-   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+// test fixture for typed tests
 template< typename Matrix >
-void test_Constructors()
+class MatrixTest : public ::testing::Test
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   Matrix m1( 5, 6 );
-   EXPECT_EQ( m1.getRows(), 5 );
-   EXPECT_EQ( m1.getColumns(), 6 );
-   Matrix m2( {1, 2, 2, 2, 1 }, 5 );
-   typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 };
-   m2.setElement( 0, 0, 1 );   // 0th row
-   m2.setElement( 1, 0, 1 );   // 1st row
-   m2.setElement( 1, 1, 1 );
-   m2.setElement( 2, 1, 1 );   // 2nd row
-   m2.setElement( 2, 2, 1 );
-   m2.setElement( 3, 2, 1 );   // 3rd row
-   m2.setElement( 3, 3, 1 );
-   m2.setElement( 4, 4, 1 );   // 4th row
-   m2.getCompressedRowLengths( v1 );
-   EXPECT_EQ( v1, v2 );
-   /*
-    * Sets up the following 6x5 sparse matrix:
-    *
-    *    /  1  2  3  0  0 \
-    *    |  0  4  5  6  0 |
-    *    |  0  0  7  8  9 |
-    *    | 10  0  0  0  0 |
-    *    |  0 11  0  0  0 |
-    *    \  0  0  0 12  0 /
-    */
-   Matrix m3( 6, 5, {
-      { 0, 0,  1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 },
-      { 1, 1,  4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 },
-      { 2, 2,  7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 },
-      { 3, 0, 10.0 },
-      { 4, 1, 11.0 },
-      { 5, 3, 12.0 } } );
-   // Check the set elements
-   EXPECT_EQ( m3.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m3.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m3.getElement( 0, 2 ),  3 );
-   EXPECT_EQ( m3.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m3.getElement( 0, 4 ),  0 );
-   EXPECT_EQ( m3.getElement( 1, 0 ),  0 );
-   EXPECT_EQ( m3.getElement( 1, 1 ),  4 );
-   EXPECT_EQ( m3.getElement( 1, 2 ),  5 );
-   EXPECT_EQ( m3.getElement( 1, 3 ),  6 );
-   EXPECT_EQ( m3.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m3.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m3.getElement( 2, 1 ),  0 );
-   EXPECT_EQ( m3.getElement( 2, 2 ),  7 );
-   EXPECT_EQ( m3.getElement( 2, 3 ),  8 );
-   EXPECT_EQ( m3.getElement( 2, 4 ),  9 );
-   EXPECT_EQ( m3.getElement( 3, 0 ), 10 );
-   EXPECT_EQ( m3.getElement( 3, 1 ),  0 );
-   EXPECT_EQ( m3.getElement( 3, 2 ),  0 );
-   EXPECT_EQ( m3.getElement( 3, 3 ),  0 );
-   EXPECT_EQ( m3.getElement( 3, 4 ),  0 );
+   using MatrixType = Matrix;
-   EXPECT_EQ( m3.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m3.getElement( 4, 1 ), 11 );
-   EXPECT_EQ( m3.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m3.getElement( 4, 3 ),  0 );
-   EXPECT_EQ( m3.getElement( 4, 4 ),  0 );
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes);
-   EXPECT_EQ( m3.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m3.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m3.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m3.getElement( 5, 3 ), 12 );
-   EXPECT_EQ( m3.getElement( 5, 4 ),  0 );
-template< typename Matrix >
-void test_SetDimensions()
+TYPED_TEST( MatrixTest, Constructors )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   const IndexType rows = 9;
-   const IndexType cols = 8;
+    using MatrixType = typename TestFixture::MatrixType;
-   Matrix m;
-   m.setDimensions( rows, cols );
-   EXPECT_EQ( m.getRows(), 9 );
-   EXPECT_EQ( m.getColumns(), 8 );
+    test_Constructors< MatrixType >();
-template< typename Matrix >
-void test_SetCompressedRowLengths()
+TYPED_TEST( MatrixTest, setDimensionsTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   const IndexType rows = 10;
-   const IndexType cols = 11;
-   Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
-   IndexType rowLength = 1;
-   for( IndexType i = 2; i < rows; i++ )
-      rowLengths.setElement( i, rowLength++ );
-   m.setCompressedRowLengths( rowLengths );
-   // Insert values into the rows.
-   RealType value = 1;
-   for( IndexType i = 0; i < 3; i++ )      // 0th row
-      m.setElement( 0, i, value++ );
-   for( IndexType i = 0; i < 3; i++ )      // 1st row
-      m.setElement( 1, i, value++ );
-   for( IndexType i = 0; i < 1; i++ )      // 2nd row
-      m.setElement( 2, i, value++ );
-   for( IndexType i = 0; i < 2; i++ )      // 3rd row
-      m.setElement( 3, i, value++ );
-   for( IndexType i = 0; i < 3; i++ )      // 4th row
-      m.setElement( 4, i, value++ );
+    using MatrixType = typename TestFixture::MatrixType;
-   for( IndexType i = 0; i < 4; i++ )      // 5th row
-      m.setElement( 5, i, value++ );
-   for( IndexType i = 0; i < 5; i++ )      // 6th row
-      m.setElement( 6, i, value++ );
-   for( IndexType i = 0; i < 6; i++ )      // 7th row
-      m.setElement( 7, i, value++ );
-   for( IndexType i = 0; i < 7; i++ )      // 8th row
-      m.setElement( 8, i, value++ );
-   for( IndexType i = 0; i < 8; i++ )      // 9th row
-      m.setElement( 9, i, value++ );
-   rowLengths = 0;
-   m.getCompressedRowLengths( rowLengths );
-   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
-   EXPECT_EQ( rowLengths, correctRowLengths );
+    test_SetDimensions< MatrixType >();
-template< typename Matrix1, typename Matrix2 >
-void test_SetLike()
+TYPED_TEST( MatrixTest, setCompressedRowLengthsTest )
-   using RealType = typename Matrix1::RealType;
-   using DeviceType = typename Matrix1::DeviceType;
-   using IndexType = typename Matrix1::IndexType;
+    using MatrixType = typename TestFixture::MatrixType;
-   const IndexType rows = 8;
-   const IndexType cols = 7;
-   Matrix1 m1( rows + 1, cols + 2 );
-   Matrix2 m2( rows, cols );
-   m1.setLike( m2 );
-   EXPECT_EQ( m1.getRows(), m2.getRows() );
-   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+    test_SetCompressedRowLengths< MatrixType >();
-template< typename Matrix >
-void test_GetNumberOfNonzeroMatrixElements()
+TYPED_TEST( MatrixTest, setLikeTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 10x10 sparse matrix:
-    *
-    *    /  1  0  2  0  3  0  4  0  0  0  \
-    *    |  5  6  7  0  0  0  0  0  0  0  |
-    *    |  8  9 10 11 12 13 14 15  0  0  |
-    *    | 16 17  0  0  0  0  0  0  0  0  |
-    *    | 18  0  0  0  0  0  0  0  0  0  |
-    *    | 19  0  0  0  0  0  0  0  0  0  |
-    *    | 20  0  0  0  0  0  0  0  0  0  |
-    *    | 21  0  0  0  0  0  0  0  0  0  |
-    *    | 22 23 24 25 26 27 28 29 30 31  |
-    *    \ 32 33 34 35 36 37 38 39 40 41 /
-    */
-   const IndexType rows = 10;
-   const IndexType cols = 10;
-   Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
-   m.setCompressedRowLengths( rowLengths );
-   RealType value = 1;
-   for( IndexType i = 0; i < 4; i++ )
-      m.setElement( 0, 2 * i, value++ );
-   for( IndexType i = 0; i < 3; i++ )
-      m.setElement( 1, i, value++ );
-   for( IndexType i = 0; i < 8; i++ )
-      m.setElement( 2, i, value++ );
-   for( IndexType i = 0; i < 2; i++ )
-      m.setElement( 3, i, value++ );
+    using MatrixType = typename TestFixture::MatrixType;
-   for( IndexType i = 4; i < 8; i++ )
-      m.setElement( i, 0, value++ );
-   for( IndexType j = 8; j < rows; j++)
-      for( IndexType i = 0; i < cols; i++ )
-         m.setElement( j, i, value++ );
-   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+    test_SetLike< MatrixType, MatrixType >();
-template< typename Matrix >
-void test_Reset()
+TYPED_TEST( MatrixTest, resetTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 5x4 sparse matrix:
-    *
-    *    /  0  0  0  0 \
-    *    |  0  0  0  0 |
-    *    |  0  0  0  0 |
-    *    |  0  0  0  0 |
-    *    \  0  0  0  0 /
-    */
+    using MatrixType = typename TestFixture::MatrixType;
-   const IndexType rows = 5;
-   const IndexType cols = 4;
-   Matrix m( rows, cols );
-   m.reset();
-   EXPECT_EQ( m.getRows(), 0 );
-   EXPECT_EQ( m.getColumns(), 0 );
+    test_Reset< MatrixType >();
-template< typename Matrix >
-void test_GetRow()
+TYPED_TEST( MatrixTest, getRowTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 10x10 sparse matrix:
-    *
-    *    /  1  0  2  0  3  0  4  0  0  0  \
-    *    |  5  6  7  0  0  0  0  0  0  0  |
-    *    |  8  9 10 11 12 13 14 15  0  0  |
-    *    | 16 17  0  0  0  0  0  0  0  0  |
-    *    | 18  0  0  0  0  0  0  0  0  0  |
-    *    | 19  0  0  0  0  0  0  0  0  0  |
-    *    | 20  0  0  0  0  0  0  0  0  0  |
-    *    | 21  0  0  0  0  0  0  0  0  0  |
-    *    | 22 23 24 25 26 27 28 29 30 31  |
-    *    \ 32 33 34 35 36 37 38 39 40 41 /
-    */
-   const IndexType rows = 10;
-   const IndexType cols = 10;
-   Matrix m( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
-   m.setCompressedRowLengths( rowLengths );
-   auto matrixView = m.getView();
-   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
-      auto row = matrixView.getRow( rowIdx );
-      RealType val;
-      switch( rowIdx )
-      {
-         case 0:
-            val = 1;
-            for( IndexType i = 0; i < 4; i++ )
-               row.setElement( i, 2 * i, val++ );
-            break;
-         case 1:
-            val = 5;
-            for( IndexType i = 0; i < 3; i++ )
-               row.setElement( i, i, val++ );
-            break;
-         case 2:
-            val = 8;
-            for( IndexType i = 0; i < 8; i++ )
-               row.setElement( i, i, val++ );
-            break;
-         case 3:
-            val = 16;
-            for( IndexType i = 0; i < 2; i++ )
-               row.setElement( i, i, val++ );
-            break;
-         case 4:
-            row.setElement( 0, 0, 18 );
-            break;
-         case 5:
-            row.setElement( 0, 0, 19 );
-            break;
-         case 6:
-            row.setElement( 0, 0, 20 );
-            break;
-         case 7:
-            row.setElement( 0, 0, 21 );
-            break;
-         case 8:
-            val = 22;
-            for( IndexType i = 0; i < rows; i++ )
-               row.setElement( i, i, val++ );
-            break;
-         case 9:
-            val = 32;
-            for( IndexType i = 0; i < rows; i++ )
-               row.setElement( i, i, val++ );
-            break;
-      }
-   };
-   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
-   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
-   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
-   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
-   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
-   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
-   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
-   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
-   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+    using MatrixType = typename TestFixture::MatrixType;
-   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
-   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
-   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
-   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
-   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
-   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
-   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
-   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
-   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
-   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
-   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
-   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
-   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
-   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
-   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
-   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
-   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
-   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
-   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
-   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
-   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+    test_GetRow< MatrixType >();
-template< typename Matrix >
-void test_SetElement()
+TYPED_TEST( MatrixTest, setElementTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 10x10 sparse matrix:
-    *
-    *    /  1  0  2  0  3  0  4  0  0  0  \
-    *    |  5  6  7  0  0  0  0  0  0  0  |
-    *    |  8  9 10 11 12 13 14 15  0  0  |
-    *    | 16 17  0  0  0  0  0  0  0  0  |
-    *    | 18  0  0  0  0  0  0  0  0  0  |
-    *    | 19  0  0  0  0  0  0  0  0  0  |
-    *    | 20  0  0  0  0  0  0  0  0  0  |
-    *    | 21  0  0  0  0  0  0  0  0  0  |
-    *    | 22 23 24 25 26 27 28 29 30 31  |
-    *    \ 32 33 34 35 36 37 38 39 40 41 /
-    */
-   const IndexType rows = 10;
-   const IndexType cols = 10;
-   Matrix m;
-   m.reset();
-   m.setDimensions( rows, cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
-   m.setCompressedRowLengths( rowLengths );
-   RealType value = 1;
-   for( IndexType i = 0; i < 4; i++ )
-      m.setElement( 0, 2 * i, value++ );
-   for( IndexType i = 0; i < 3; i++ )
-      m.setElement( 1, i, value++ );
-   for( IndexType i = 0; i < 8; i++ )
-      m.setElement( 2, i, value++ );
-   for( IndexType i = 0; i < 2; i++ )
-      m.setElement( 3, i, value++ );
-   for( IndexType i = 4; i < 8; i++ )
-      m.setElement( i, 0, value++ );
+    using MatrixType = typename TestFixture::MatrixType;
-   for( IndexType j = 8; j < rows; j++)
-      for( IndexType i = 0; i < cols; i++ )
-         m.setElement( j, i, value++ );
-   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
-   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
-   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
-   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
-   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
-   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
-   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
-   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
-   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
-   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
-   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
-   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
-   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
-   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
-   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
-   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
-   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
-   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
-   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
-   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
-   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
-   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
-   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
-   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
-   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
-   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
-   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
-   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
-   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
-   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+    test_SetElement< MatrixType >();
-template< typename Matrix >
-void test_AddElement()
+TYPED_TEST( MatrixTest, addElementTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 6x5 sparse matrix:
-    *
-    *    /  1  2  3  0  0 \
-    *    |  0  4  5  6  0 |
-    *    |  0  0  7  8  9 |
-    *    | 10  1  1  0  0 |
-    *    |  0 11  1  1  0 |
-    *    \  0  0  1 12  1 /
-    */
-   const IndexType rows = 6;
-   const IndexType cols = 5;
-   Matrix m( rows, cols, {
-      { 0, 0,  1 }, { 0, 1,  2 }, { 0, 2, 3 },
-                    { 1, 1,  4 }, { 1, 2, 5 }, { 1, 3,  6 },
-                                  { 2, 2, 7 }, { 2, 3,  8 }, { 2, 4, 9 },
-      { 3, 0, 10 }, { 3, 1,  1 }, { 3, 2, 1 },
-                    { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3,  1 },
-                                  { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } );
-   /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
-   m.setCompressedRowLengths( rowLengths );
-   RealType value = 1;
-   for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
-      m.setElement( 0, i, value++ );
-   for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
-      m.setElement( 1, i, value++ );
+    using MatrixType = typename TestFixture::MatrixType;
-   for( IndexType i = 2; i < cols; i++ )         // 2nd row
-      m.setElement( 2, i, value++ );
-   m.setElement( 3, 0, value++ );      // 3rd row
-   m.setElement( 4, 1, value++ );      // 4th row
-   m.setElement( 5, 3, value++ );      // 5th row*/
-   // Check the set elements
-   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
-   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
-   EXPECT_EQ( m.getElement( 1, 3 ),  6 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
-   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
-   EXPECT_EQ( m.getElement( 2, 4 ),  9 );
-   EXPECT_EQ( m.getElement( 3, 0 ), 10 );
-   EXPECT_EQ( m.getElement( 3, 1 ),  1 );
-   EXPECT_EQ( m.getElement( 3, 2 ),  1 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ), 11 );
-   EXPECT_EQ( m.getElement( 4, 2 ),  1 );
-   EXPECT_EQ( m.getElement( 4, 3 ),  1 );
-   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ),  1 );
-   EXPECT_EQ( m.getElement( 5, 3 ), 12 );
-   EXPECT_EQ( m.getElement( 5, 4 ),  1 );
-   // Add new elements to the old elements with a multiplying factor applied to the old elements.
-   /*
-    * The following setup results in the following 6x5 sparse matrix:
-    *
-    *    /  3  6  9  0  0 \
-    *    |  0 12 15 18  0 |
-    *    |  0  0 21 24 27 |
-    *    | 30 13 14  0  0 |
-    *    |  0 35 16 17  0 |
-    *    \  0  0 18 41 20 /
-    */
-   RealType newValue = 1;
-   for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
-      m.addElement( 0, i, newValue++, 2.0 );
-   for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
-      m.addElement( 1, i, newValue++, 2.0 );
-   for( IndexType i = 2; i < cols; i++ )             // 2nd row
-      m.addElement( 2, i, newValue++, 2.0 );
-   for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
-      m.addElement( 3, i, newValue++, 2.0 );
-   for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
-      m.addElement( 4, i, newValue++, 2.0 );
-   for( IndexType i = 2; i < cols; i++ )             // 5th row
-      m.addElement( 5, i, newValue++, 2.0 );
-   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
-   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
-   EXPECT_EQ( m.getElement( 0, 2 ),  9 );
-   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 1, 1 ), 12 );
-   EXPECT_EQ( m.getElement( 1, 2 ), 15 );
-   EXPECT_EQ( m.getElement( 1, 3 ), 18 );
-   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 2, 2 ), 21 );
-   EXPECT_EQ( m.getElement( 2, 3 ), 24 );
-   EXPECT_EQ( m.getElement( 2, 4 ), 27 );
-   EXPECT_EQ( m.getElement( 3, 0 ), 30 );
-   EXPECT_EQ( m.getElement( 3, 1 ), 13 );
-   EXPECT_EQ( m.getElement( 3, 2 ), 14 );
-   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
-   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 4, 1 ), 35 );
-   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
-   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
-   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
-   EXPECT_EQ( m.getElement( 5, 2 ), 18 );
-   EXPECT_EQ( m.getElement( 5, 3 ), 41 );
-   EXPECT_EQ( m.getElement( 5, 4 ), 20 );
+    test_AddElement< MatrixType >();
-template< typename Matrix >
-void test_VectorProduct()
+TYPED_TEST( MatrixTest, vectorProductTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
-   /*
-    * Sets up the following 4x4 sparse matrix:
-    *
-    *    /  1  0  0  0 \
-    *    |  0  2  0  3 |
-    *    |  0  4  0  0 |
-    *    \  0  0  5  0 /
-    */
-   const IndexType m_rows_1 = 4;
-   const IndexType m_cols_1 = 4;
-   Matrix m_1;
-   m_1.reset();
-   m_1.setDimensions( m_rows_1, m_cols_1 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 };
-   m_1.setCompressedRowLengths( rowLengths_1 );
-   RealType value_1 = 1;
-   m_1.setElement( 0, 0, value_1++ );      // 0th row
-   m_1.setElement( 1, 1, value_1++ );      // 1st row
-   m_1.setElement( 1, 3, value_1++ );
-   m_1.setElement( 2, 1, value_1++ );      // 2nd row
-   m_1.setElement( 3, 2, value_1++ );      // 3rd row
+    using MatrixType = typename TestFixture::MatrixType;
-   VectorType inVector_1;
-   inVector_1.setSize( m_cols_1 );
-   for( IndexType i = 0; i < inVector_1.getSize(); i++ )
-       inVector_1.setElement( i, 2 );
-   VectorType outVector_1;
-   outVector_1.setSize( m_rows_1 );
-   for( IndexType j = 0; j < outVector_1.getSize(); j++ )
-       outVector_1.setElement( j, 0 );
-   m_1.vectorProduct( inVector_1, outVector_1 );
-   EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
-   EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
-   EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
-   EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
-   /*
-    * Sets up the following 4x4 sparse matrix:
-    *
-    *    /  1  2  3  0 \
-    *    |  0  0  0  4 |
-    *    |  5  6  7  0 |
-    *    \  0  8  0  0 /
-    */
-   const IndexType m_rows_2 = 4;
-   const IndexType m_cols_2 = 4;
-   Matrix m_2( m_rows_2, m_cols_2 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
-   m_2.setCompressedRowLengths( rowLengths_2 );
-   RealType value_2 = 1;
-   for( IndexType i = 0; i < 3; i++ )      // 0th row
-      m_2.setElement( 0, i, value_2++ );
-   m_2.setElement( 1, 3, value_2++ );      // 1st row
-   for( IndexType i = 0; i < 3; i++ )      // 2nd row
-      m_2.setElement( 2, i, value_2++ );
-   for( IndexType i = 1; i < 2; i++ )      // 3rd row
-      m_2.setElement( 3, i, value_2++ );
-   VectorType inVector_2;
-   inVector_2.setSize( m_cols_2 );
-   for( IndexType i = 0; i < inVector_2.getSize(); i++ )
-      inVector_2.setElement( i, 2 );
-   VectorType outVector_2;
-   outVector_2.setSize( m_rows_2 );
-   for( IndexType j = 0; j < outVector_2.getSize(); j++ )
-      outVector_2.setElement( j, 0 );
-   m_2.vectorProduct( inVector_2, outVector_2 );
-   EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
-   EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
-   EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
-   EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
-   /*
-    * Sets up the following 4x4 sparse matrix:
-    *
-    *    /  1  2  3  0 \
-    *    |  0  4  5  6 |
-    *    |  7  8  9  0 |
-    *    \  0 10 11 12 /
-    */
-   const IndexType m_rows_3 = 4;
-   const IndexType m_cols_3 = 4;
-   Matrix m_3( m_rows_3, m_cols_3 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 };
-   m_3.setCompressedRowLengths( rowLengths_3 );
-   RealType value_3 = 1;
-   for( IndexType i = 0; i < 3; i++ )          // 0th row
-      m_3.setElement( 0, i, value_3++ );
-   for( IndexType i = 1; i < 4; i++ )
-      m_3.setElement( 1, i, value_3++ );      // 1st row
-   for( IndexType i = 0; i < 3; i++ )          // 2nd row
-      m_3.setElement( 2, i, value_3++ );
-   for( IndexType i = 1; i < 4; i++ )          // 3rd row
-      m_3.setElement( 3, i, value_3++ );
-   VectorType inVector_3;
-   inVector_3.setSize( m_cols_3 );
-   for( IndexType i = 0; i < inVector_3.getSize(); i++ )
-      inVector_3.setElement( i, 2 );
-   VectorType outVector_3;
-   outVector_3.setSize( m_rows_3 );
-   for( IndexType j = 0; j < outVector_3.getSize(); j++ )
-      outVector_3.setElement( j, 0 );
-   m_3.vectorProduct( inVector_3, outVector_3 );
-   EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
-   EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
-   EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
-   EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
-   /*
-    * Sets up the following 8x8 sparse matrix:
-    *
-    *    /  1  2  3  0  0  4  0  0 \
-    *    |  0  5  6  7  8  0  0  0 |
-    *    |  9 10 11 12 13  0  0  0 |
-    *    |  0 14 15 16 17  0  0  0 |
-    *    |  0  0 18 19 20 21  0  0 |
-    *    |  0  0  0 22 23 24 25  0 |
-    *    | 26 27 28 29 30  0  0  0 |
-    *    \ 31 32 33 34 35  0  0  0 /
-    */
-   const IndexType m_rows_4 = 8;
-   const IndexType m_cols_4 = 8;
-   Matrix m_4( m_rows_4, m_cols_4 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
-   m_4.setCompressedRowLengths( rowLengths_4 );
-   RealType value_4 = 1;
-   for( IndexType i = 0; i < 3; i++ )       // 0th row
-      m_4.setElement( 0, i, value_4++ );
-   m_4.setElement( 0, 5, value_4++ );
-   for( IndexType i = 1; i < 5; i++ )       // 1st row
-      m_4.setElement( 1, i, value_4++ );
-   for( IndexType i = 0; i < 5; i++ )       // 2nd row
-      m_4.setElement( 2, i, value_4++ );
-   for( IndexType i = 1; i < 5; i++ )       // 3rd row
-      m_4.setElement( 3, i, value_4++ );
-   for( IndexType i = 2; i < 6; i++ )       // 4th row
-      m_4.setElement( 4, i, value_4++ );
-   for( IndexType i = 3; i < 7; i++ )       // 5th row
-      m_4.setElement( 5, i, value_4++ );
-   for( IndexType i = 0; i < 5; i++ )       // 6th row
-      m_4.setElement( 6, i, value_4++ );
-   for( IndexType i = 0; i < 5; i++ )       // 7th row
-      m_4.setElement( 7, i, value_4++ );
-   VectorType inVector_4;
-   inVector_4.setSize( m_cols_4 );
-   for( IndexType i = 0; i < inVector_4.getSize(); i++ )
-      inVector_4.setElement( i, 2 );
-   VectorType outVector_4;
-   outVector_4.setSize( m_rows_4 );
-   for( IndexType j = 0; j < outVector_4.getSize(); j++ )
-      outVector_4.setElement( j, 0 );
-   m_4.vectorProduct( inVector_4, outVector_4 );
-   EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
-   EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
-   EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
-   EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
-   EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
-   EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
-   EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
-   EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
-   /*
-    * Sets up the following 8x8 sparse matrix:
-    *
-    *    /  1  2  3  0  4  5  0  1 \   6
-    *    |  0  6  0  7  0  0  0  1 |   3
-    *    |  0  8  9  0 10  0  0  1 |   4
-    *    |  0 11 12 13 14  0  0  1 |   5
-    *    |  0 15  0  0  0  0  0  1 |   2
-    *    |  0 16 17 18 19 20 21  1 |   7
-    *    | 22 23 24 25 26 27 28  1 |   8
-    *    \ 29 30 31 32 33 34 35 36 /   8
-    */
-   const IndexType m_rows_5 = 8;
-   const IndexType m_cols_5 = 8;
-   Matrix m_5( m_rows_5, m_cols_5 );
-   typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 };
-   m_5.setCompressedRowLengths( rowLengths_5 );
-   RealType value_5 = 1;
-   for( IndexType i = 0; i < 3; i++ )   // 0th row
-      m_5.setElement( 0, i, value_5++ );
-   m_5.setElement( 0, 4, value_5++ );           // 0th row
-   m_5.setElement( 0, 5, value_5++ );
-   m_5.setElement( 1, 1, value_5++ );           // 1st row
-   m_5.setElement( 1, 3, value_5++ );
-   for( IndexType i = 1; i < 3; i++ )            // 2nd row
-      m_5.setElement( 2, i, value_5++ );
-   m_5.setElement( 2, 4, value_5++ );           // 2nd row
-   for( IndexType i = 1; i < 5; i++ )            // 3rd row
-      m_5.setElement( 3, i, value_5++ );
-   m_5.setElement( 4, 1, value_5++ );           // 4th row
-   for( IndexType i = 1; i < 7; i++ )            // 5th row
-      m_5.setElement( 5, i, value_5++ );
-   for( IndexType i = 0; i < 7; i++ )            // 6th row
-      m_5.setElement( 6, i, value_5++ );
-   for( IndexType i = 0; i < 8; i++ )            // 7th row
-      m_5.setElement( 7, i, value_5++ );
-   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
-      m_5.setElement( i, 7, 1);
-   VectorType inVector_5;
-   inVector_5.setSize( m_cols_5 );
-   for( IndexType i = 0; i < inVector_5.getSize(); i++ )
-       inVector_5.setElement( i, 2 );
-   VectorType outVector_5;
-   outVector_5.setSize( m_rows_5 );
-   for( IndexType j = 0; j < outVector_5.getSize(); j++ )
-       outVector_5.setElement( j, 0 );
-   m_5.vectorProduct( inVector_5, outVector_5 );
-   EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
-   EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
-   EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
-   EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
-   EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
-   EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
-   EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
-   EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
-template< typename Matrix >
-void test_RowsReduction()
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 8x8 sparse matrix:
-    *
-    *    /  1  2  3  0  4  5  0  1 \   6
-    *    |  0  6  0  7  0  0  0  1 |   3
-    *    |  0  8  9  0 10  0  0  1 |   4
-    *    |  0 11 12 13 14  0  0  1 |   5
-    *    |  0 15  0  0  0  0  0  1 |   2
-    *    |  0 16 17 18 19 20 21  1 |   7
-    *    | 22 23 24 25 26 27 28  1 |   8
-    *    \ 29 30 31 32 33 34 35 36 /   8
-    */
-   const IndexType rows = 8;
-   const IndexType cols = 8;
-   Matrix m;
-   m.setDimensions( rows, cols );
-   typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
-   m.setCompressedRowLengths( rowsCapacities );
-   RealType value = 1;
-   for( IndexType i = 0; i < 3; i++ )   // 0th row
-      m.setElement( 0, i, value++ );
-   m.setElement( 0, 4, value++ );       // 0th row
-   m.setElement( 0, 5, value++ );
-   m.setElement( 1, 1, value++ );       // 1st row
-   m.setElement( 1, 3, value++ );
-   for( IndexType i = 1; i < 3; i++ )   // 2nd row
-      m.setElement( 2, i, value++ );
-   m.setElement( 2, 4, value++ );       // 2nd row
-   for( IndexType i = 1; i < 5; i++ )   // 3rd row
-      m.setElement( 3, i, value++ );
-   m.setElement( 4, 1, value++ );       // 4th row
-   for( IndexType i = 1; i < 7; i++ )   // 5th row
-      m.setElement( 5, i, value++ );
-   for( IndexType i = 0; i < 7; i++ )   // 6th row
-      m.setElement( 6, i, value++ );
-   for( IndexType i = 0; i < 8; i++ )   // 7th row
-       m.setElement( 7, i, value++ );
-   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
-      m.setElement( i, 7, 1);
-   ////
-   // Compute number of non-zero elements in rows.
-   typename Matrix::RowsCapacitiesType rowLengths( rows );
-   auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
-      return ( value != 0.0 );
-   };
-   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-      aux += a;
-   };
-   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowLengths_view[ rowIdx ] = value;
-   };
-   m.allRowsReduction( fetch, reduce, keep, 0 );
-   EXPECT_EQ( rowsCapacities, rowLengths );
-   m.getCompressedRowLengths( rowLengths );
-   EXPECT_EQ( rowsCapacities, rowLengths );
-   ////
-   // Compute max norm
-   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
-   auto rowSums_view = rowSums.getView();
-   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
-      return abs( value );
-   };
-   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-      aux += a;
-   };
-   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowSums_view[ rowIdx ] = value;
-   };
-   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
-   const RealType maxNorm = TNL::max( rowSums );
-   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+    test_VectorProduct< MatrixType >();
-template< typename Matrix >
-void test_PerformSORIteration()
+TYPED_TEST( MatrixTest, rowsReduction )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 4x4 sparse matrix:
-    *
-    *    /  4  1  0  0 \
-    *    |  1  4  1  0 |
-    *    |  0  1  4  1 |
-    *    \  0  0  1  4 /
-    */
+    using MatrixType = typename TestFixture::MatrixType;
-   const IndexType m_rows = 4;
-   const IndexType m_cols = 4;
-   Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
-   m.setCompressedRowLengths( rowLengths );
-   m.setElement( 0, 0, 4.0 );        // 0th row
-   m.setElement( 0, 1, 1.0);
-   m.setElement( 1, 0, 1.0 );        // 1st row
-   m.setElement( 1, 1, 4.0 );
-   m.setElement( 1, 2, 1.0 );
-   m.setElement( 2, 1, 1.0 );        // 2nd row
-   m.setElement( 2, 2, 4.0 );
-   m.setElement( 2, 3, 1.0 );
-   m.setElement( 3, 2, 1.0 );        // 3rd row
-   m.setElement( 3, 3, 4.0 );
-   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
-   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
-   IndexType row = 0;
-   RealType omega = 1;
-   m.performSORIteration( bVector, row++, xVector, omega);
-   EXPECT_EQ( xVector[ 0 ], 0.0 );
-   EXPECT_EQ( xVector[ 1 ], 1.0 );
-   EXPECT_EQ( xVector[ 2 ], 1.0 );
-   EXPECT_EQ( xVector[ 3 ], 1.0 );
-   m.performSORIteration( bVector, row++, xVector, omega);
-   EXPECT_EQ( xVector[ 0 ], 0.0 );
-   EXPECT_EQ( xVector[ 1 ], 0.0 );
-   EXPECT_EQ( xVector[ 2 ], 1.0 );
-   EXPECT_EQ( xVector[ 3 ], 1.0 );
-   m.performSORIteration( bVector, row++, xVector, omega);
-   EXPECT_EQ( xVector[ 0 ], 0.0 );
-   EXPECT_EQ( xVector[ 1 ], 0.0 );
-   EXPECT_EQ( xVector[ 2 ], 0.0 );
-   EXPECT_EQ( xVector[ 3 ], 1.0 );
-   m.performSORIteration( bVector, row++, xVector, omega);
-   EXPECT_EQ( xVector[ 0 ], 0.0 );
-   EXPECT_EQ( xVector[ 1 ], 0.0 );
-   EXPECT_EQ( xVector[ 2 ], 0.0 );
-   EXPECT_EQ( xVector[ 3 ], 0.25 );
+    test_RowsReduction< MatrixType >();
-template< typename Matrix >
-void test_SaveAndLoad( const char* filename )
+TYPED_TEST( MatrixTest, saveAndLoadTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
+    using MatrixType = typename TestFixture::MatrixType;
-   /*
-    * Sets up the following 4x4 sparse matrix:
-    *
-    *    /  1  2  3  0 \
-    *    |  0  4  0  5 |
-    *    |  6  7  8  0 |
-    *    \  0  9 10 11 /
-    */
-   const IndexType m_rows = 4;
-   const IndexType m_cols = 4;
-   Matrix savedMatrix( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
-   savedMatrix.setCompressedRowLengths( rowLengths );
-   RealType value = 1;
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-      savedMatrix.setElement( 0, i, value++ );
-   savedMatrix.setElement( 1, 1, value++ );
-   savedMatrix.setElement( 1, 3, value++ );      // 1st row
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-      savedMatrix.setElement( 2, i, value++ );
-   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-      savedMatrix.setElement( 3, i, value++ );
-   ASSERT_NO_THROW( savedMatrix.save( filename ) );
-   Matrix loadedMatrix;
-   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
-   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
-   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
-   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
-   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
-   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
-   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
-   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
-   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
-   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
-   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
-   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
-   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
-   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
-   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
-   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
-   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
-   EXPECT_EQ( std::remove( filename ), 0 );
+    test_SaveAndLoad< MatrixType >( saveAndLoadFileName );
-template< typename Matrix >
-void test_Print()
+TYPED_TEST( MatrixTest, printTest )
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   /*
-    * Sets up the following 5x4 sparse matrix:
-    *
-    *    /  1  2  3  0 \
-    *    |  0  0  0  4 |
-    *    |  5  6  7  0 |
-    *    |  0  8  9 10 |
-    *    \  0  0 11 12 /
-    */
-   const IndexType m_rows = 5;
-   const IndexType m_cols = 4;
-   Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
-   m.setCompressedRowLengths( rowLengths );
-   RealType value = 1;
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-      m.setElement( 0, i, value++ );
-   m.setElement( 1, 3, value++ );                // 1st row
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-      m.setElement( 2, i, value++ );
-   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-      m.setElement( 3, i, value++ );
-   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
-      m.setElement( 4, i, value++ );
-   std::stringstream printed;
-   std::stringstream couted;
-   //change the underlying buffer and save the old buffer
-   auto old_buf = std::cout.rdbuf(printed.rdbuf());
-   m.print( std::cout ); //all the std::cout goes to ss
-   std::cout.rdbuf(old_buf); //reset
-   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
-             "Row: 1 ->  Col:3->4\t\n"
-             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
-             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
-             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+    using MatrixType = typename TestFixture::MatrixType;
-   EXPECT_EQ( printed.str(), couted.str() );
+    test_Print< MatrixType >();
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d8856547247adacff6923af9bd13c2d767c59e12
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -0,0 +1,1468 @@
+                          SparseMatrixTest.hpp -  description
+                             -------------------
+    begin                : Nov 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <iostream>
+#include <sstream>
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+template< typename Matrix >
+void test_Constructors()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   Matrix m1( 5, 6 );
+   EXPECT_EQ( m1.getRows(), 5 );
+   EXPECT_EQ( m1.getColumns(), 6 );
+   Matrix m2( {1, 2, 2, 2, 1 }, 5 );
+   typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 };
+   m2.setElement( 0, 0, 1 );   // 0th row
+   m2.setElement( 1, 0, 1 );   // 1st row
+   m2.setElement( 1, 1, 1 );
+   m2.setElement( 2, 1, 1 );   // 2nd row
+   m2.setElement( 2, 2, 1 );
+   m2.setElement( 3, 2, 1 );   // 3rd row
+   m2.setElement( 3, 3, 1 );
+   m2.setElement( 4, 4, 1 );   // 4th row
+   EXPECT_EQ( m2.getElement( 0, 0 ), 1 );   // 0th row
+   EXPECT_EQ( m2.getElement( 1, 0 ), 1 );   // 1st row
+   EXPECT_EQ( m2.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m2.getElement( 2, 1 ), 1 );   // 2nd row
+   EXPECT_EQ( m2.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m2.getElement( 3, 2 ), 1 );   // 3rd row
+   EXPECT_EQ( m2.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m2.getElement( 4, 4 ), 1 );   // 4th row
+   m2.getCompressedRowLengths( v1 );
+   EXPECT_EQ( v1, v2 );
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  4  5  6  0 |
+    *    |  0  0  7  8  9 |
+    *    | 10  0  0  0  0 |
+    *    |  0 11  0  0  0 |
+    *    \  0  0  0 12  0 /
+    */
+   Matrix m3( 6, 5, {
+      { 0, 0,  1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 },
+      { 1, 1,  4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 },
+      { 2, 2,  7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 },
+      { 3, 0, 10.0 },
+      { 4, 1, 11.0 },
+      { 5, 3, 12.0 } } );
+   // Check the set elements
+   EXPECT_EQ( m3.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m3.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m3.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m3.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m3.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m3.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m3.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m3.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m3.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m3.getElement( 2, 4 ),  9 );
+   EXPECT_EQ( m3.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m3.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m3.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m3.getElement( 5, 4 ),  0 );
+   std::map< std::pair< int, int >, float > map;
+   map[ { 0, 0 } ] = 1.0;
+   map[ { 0, 1 } ] = 2.0;
+   map[ { 0, 2 } ] = 3.0;
+   map[ { 1, 1 } ] = 4.0;
+   map[ { 1, 2 } ] = 5.0;
+   map[ { 1, 3 } ] = 6.0;
+   map[ { 2, 2 } ] = 7.0;
+   map[ { 2, 3 } ] = 8.0;
+   map[ { 2, 4 } ] = 9.0;
+   map[ { 3, 0 } ] = 10.0;
+   map[ { 4, 1 } ] = 11.0;
+   map[ { 5, 3 } ] = 12.0;
+   Matrix m4( 6, 5, map );
+   // Check the matrix elements
+   EXPECT_EQ( m4.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m4.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m4.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m4.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m4.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m4.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m4.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m4.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m4.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m4.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m4.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m4.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m4.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m4.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m4.getElement( 2, 4 ),  9 );
+   EXPECT_EQ( m4.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m4.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m4.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m4.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m4.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m4.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m4.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m4.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m4.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m4.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m4.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m4.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m4.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m4.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m4.getElement( 5, 4 ),  0 );
+template< typename Matrix >
+void test_SetDimensions()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+   Matrix m;
+   m.setDimensions( rows, cols );
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
+   IndexType rowLength = 1;
+   for( IndexType i = 2; i < rows; i++ )
+      rowLengths.setElement( i, rowLength++ );
+   m.setCompressedRowLengths( rowLengths );
+   // Insert values into the rows.
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m.setElement( 0, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )      // 1st row
+      m.setElement( 1, i, value++ );
+   for( IndexType i = 0; i < 1; i++ )      // 2nd row
+      m.setElement( 2, i, value++ );
+   for( IndexType i = 0; i < 2; i++ )      // 3rd row
+      m.setElement( 3, i, value++ );
+   for( IndexType i = 0; i < 3; i++ )      // 4th row
+      m.setElement( 4, i, value++ );
+   for( IndexType i = 0; i < 4; i++ )      // 5th row
+      m.setElement( 5, i, value++ );
+   for( IndexType i = 0; i < 5; i++ )      // 6th row
+      m.setElement( 6, i, value++ );
+   for( IndexType i = 0; i < 6; i++ )      // 7th row
+      m.setElement( 7, i, value++ );
+   for( IndexType i = 0; i < 7; i++ )      // 8th row
+      m.setElement( 8, i, value++ );
+   for( IndexType i = 0; i < 8; i++ )      // 9th row
+      m.setElement( 9, i, value++ );
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
+   m1.setLike( m2 );
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+   for( IndexType j = 8; j < rows; j++)
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+template< typename Matrix >
+void test_Reset()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+   Matrix m( rows, cols );
+   m.reset();
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+template< typename Matrix >
+void test_GetRow()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      RealType val;
+      switch( rowIdx )
+      {
+         case 0:
+            val = 1;
+            for( IndexType i = 0; i < 4; i++ )
+               row.setElement( i, 2 * i, val++ );
+            break;
+         case 1:
+            val = 5;
+            for( IndexType i = 0; i < 3; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 2:
+            val = 8;
+            for( IndexType i = 0; i < 8; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 3:
+            val = 16;
+            for( IndexType i = 0; i < 2; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 4:
+            row.setElement( 0, 0, 18 );
+            break;
+         case 5:
+            row.setElement( 0, 0, 19 );
+            break;
+         case 6:
+            row.setElement( 0, 0, 20 );
+            break;
+         case 7:
+            row.setElement( 0, 0, 21 );
+            break;
+         case 8:
+            val = 22;
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 9:
+            val = 32;
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, val++ );
+            break;
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+template< typename Matrix >
+void test_SetElement()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+   Matrix m;
+   m.reset();
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+   for( IndexType j = 8; j < rows; j++)
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+template< typename Matrix >
+void test_AddElement()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  4  5  6  0 |
+    *    |  0  0  7  8  9 |
+    *    | 10  1  1  0  0 |
+    *    |  0 11  1  1  0 |
+    *    \  0  0  1 12  1 /
+    */
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+   Matrix m( rows, cols, {
+      { 0, 0,  1 }, { 0, 1,  2 }, { 0, 2, 3 },
+                    { 1, 1,  4 }, { 1, 2, 5 }, { 1, 3,  6 },
+                                  { 2, 2, 7 }, { 2, 3,  8 }, { 2, 4, 9 },
+      { 3, 0, 10 }, { 3, 1,  1 }, { 3, 2, 1 },
+                    { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3,  1 },
+                                  { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } );
+   /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+   RealType value = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
+      m.setElement( 0, i, value++ );
+   for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
+      m.setElement( 1, i, value++ );
+   for( IndexType i = 2; i < cols; i++ )         // 2nd row
+      m.setElement( 2, i, value++ );
+   m.setElement( 3, 0, value++ );      // 3rd row
+   m.setElement( 4, 1, value++ );      // 4th row
+   m.setElement( 5, 3, value++ );      // 5th row*/
+   // Check the set elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  1 );
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9  0  0 \
+    *    |  0 12 15 18  0 |
+    *    |  0  0 21 24 27 |
+    *    | 30 13 14  0  0 |
+    *    |  0 35 16 17  0 |
+    *    \  0  0 18 41 20 /
+    */
+   RealType newValue = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
+      m.addElement( 0, i, newValue++, 2.0 );
+   for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
+      m.addElement( 1, i, newValue++, 2.0 );
+   for( IndexType i = 2; i < cols; i++ )             // 2nd row
+      m.addElement( 2, i, newValue++, 2.0 );
+   for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
+      m.addElement( 3, i, newValue++, 2.0 );
+   for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
+      m.addElement( 4, i, newValue++, 2.0 );
+   for( IndexType i = 2; i < cols; i++ )             // 5th row
+      m.addElement( 5, i, newValue++, 2.0 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 27 );
+   EXPECT_EQ( m.getElement( 3, 0 ), 30 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 14 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 35 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 41 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 20 );
+template< typename Matrix >
+void test_VectorProduct()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  2  0  3 |
+    *    |  0  4  0  0 |
+    *    \  0  0  5  0 /
+    */
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
+   Matrix m_1;
+   m_1.reset();
+   m_1.setDimensions( m_rows_1, m_cols_1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 };
+   m_1.setCompressedRowLengths( rowLengths_1 );
+   RealType value_1 = 1;
+   m_1.setElement( 0, 0, value_1++ );      // 0th row
+   m_1.setElement( 1, 1, value_1++ );      // 1st row
+   m_1.setElement( 1, 3, value_1++ );
+   m_1.setElement( 2, 1, value_1++ );      // 2nd row
+   m_1.setElement( 3, 2, value_1++ );      // 3rd row
+   VectorType inVector_1;
+   inVector_1.setSize( m_cols_1 );
+   for( IndexType i = 0; i < inVector_1.getSize(); i++ )
+       inVector_1.setElement( i, 2 );
+   VectorType outVector_1;
+   outVector_1.setSize( m_rows_1 );
+   for( IndexType j = 0; j < outVector_1.getSize(); j++ )
+       outVector_1.setElement( j, 0 );
+   m_1.vectorProduct( inVector_1, outVector_1 );
+   EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
+   EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    \  0  8  0  0 /
+    */
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
+   Matrix m_2( m_rows_2, m_cols_2 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
+   m_2.setCompressedRowLengths( rowLengths_2 );
+   RealType value_2 = 1;
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m_2.setElement( 0, i, value_2++ );
+   m_2.setElement( 1, 3, value_2++ );      // 1st row
+   for( IndexType i = 0; i < 3; i++ )      // 2nd row
+      m_2.setElement( 2, i, value_2++ );
+   for( IndexType i = 1; i < 2; i++ )      // 3rd row
+      m_2.setElement( 3, i, value_2++ );
+   VectorType inVector_2;
+   inVector_2.setSize( m_cols_2 );
+   for( IndexType i = 0; i < inVector_2.getSize(); i++ )
+      inVector_2.setElement( i, 2 );
+   VectorType outVector_2;
+   outVector_2.setSize( m_rows_2 );
+   for( IndexType j = 0; j < outVector_2.getSize(); j++ )
+      outVector_2.setElement( j, 0 );
+   m_2.vectorProduct( inVector_2, outVector_2 );
+   EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  5  6 |
+    *    |  7  8  9  0 |
+    *    \  0 10 11 12 /
+    */
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
+   Matrix m_3( m_rows_3, m_cols_3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 };
+   m_3.setCompressedRowLengths( rowLengths_3 );
+   RealType value_3 = 1;
+   for( IndexType i = 0; i < 3; i++ )          // 0th row
+      m_3.setElement( 0, i, value_3++ );
+   for( IndexType i = 1; i < 4; i++ )
+      m_3.setElement( 1, i, value_3++ );      // 1st row
+   for( IndexType i = 0; i < 3; i++ )          // 2nd row
+      m_3.setElement( 2, i, value_3++ );
+   for( IndexType i = 1; i < 4; i++ )          // 3rd row
+      m_3.setElement( 3, i, value_3++ );
+   VectorType inVector_3;
+   inVector_3.setSize( m_cols_3 );
+   for( IndexType i = 0; i < inVector_3.getSize(); i++ )
+      inVector_3.setElement( i, 2 );
+   VectorType outVector_3;
+   outVector_3.setSize( m_rows_3 );
+   for( IndexType j = 0; j < outVector_3.getSize(); j++ )
+      outVector_3.setElement( j, 0 );
+   m_3.vectorProduct( inVector_3, outVector_3 );
+   EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  0  4  0  0 \
+    *    |  0  5  6  7  8  0  0  0 |
+    *    |  9 10 11 12 13  0  0  0 |
+    *    |  0 14 15 16 17  0  0  0 |
+    *    |  0  0 18 19 20 21  0  0 |
+    *    |  0  0  0 22 23 24 25  0 |
+    *    | 26 27 28 29 30  0  0  0 |
+    *    \ 31 32 33 34 35  0  0  0 /
+    */
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
+   Matrix m_4( m_rows_4, m_cols_4 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
+   m_4.setCompressedRowLengths( rowLengths_4 );
+   RealType value_4 = 1;
+   for( IndexType i = 0; i < 3; i++ )       // 0th row
+      m_4.setElement( 0, i, value_4++ );
+   m_4.setElement( 0, 5, value_4++ );
+   for( IndexType i = 1; i < 5; i++ )       // 1st row
+      m_4.setElement( 1, i, value_4++ );
+   for( IndexType i = 0; i < 5; i++ )       // 2nd row
+      m_4.setElement( 2, i, value_4++ );
+   for( IndexType i = 1; i < 5; i++ )       // 3rd row
+      m_4.setElement( 3, i, value_4++ );
+   for( IndexType i = 2; i < 6; i++ )       // 4th row
+      m_4.setElement( 4, i, value_4++ );
+   for( IndexType i = 3; i < 7; i++ )       // 5th row
+      m_4.setElement( 5, i, value_4++ );
+   for( IndexType i = 0; i < 5; i++ )       // 6th row
+      m_4.setElement( 6, i, value_4++ );
+   for( IndexType i = 0; i < 5; i++ )       // 7th row
+      m_4.setElement( 7, i, value_4++ );
+   VectorType inVector_4;
+   inVector_4.setSize( m_cols_4 );
+   for( IndexType i = 0; i < inVector_4.getSize(); i++ )
+      inVector_4.setElement( i, 2 );
+   VectorType outVector_4;
+   outVector_4.setSize( m_rows_4 );
+   for( IndexType j = 0; j < outVector_4.getSize(); j++ )
+      outVector_4.setElement( j, 0 );
+   m_4.vectorProduct( inVector_4, outVector_4 );
+   EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
+   EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
+   EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
+   EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
+   EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
+   EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+   Matrix m_5( m_rows_5, m_cols_5 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m_5.setCompressedRowLengths( rowLengths_5 );
+   RealType value_5 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_5.setElement( 0, i, value_5++ );
+   m_5.setElement( 0, 4, value_5++ );           // 0th row
+   m_5.setElement( 0, 5, value_5++ );
+   m_5.setElement( 1, 1, value_5++ );           // 1st row
+   m_5.setElement( 1, 3, value_5++ );
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m_5.setElement( 2, i, value_5++ );
+   m_5.setElement( 2, 4, value_5++ );           // 2nd row
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m_5.setElement( 3, i, value_5++ );
+   m_5.setElement( 4, 1, value_5++ );           // 4th row
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m_5.setElement( 5, i, value_5++ );
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m_5.setElement( 6, i, value_5++ );
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+      m_5.setElement( 7, i, value_5++ );
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m_5.setElement( i, 7, 1);
+   VectorType inVector_5;
+   inVector_5.setSize( m_cols_5 );
+   for( IndexType i = 0; i < inVector_5.getSize(); i++ )
+       inVector_5.setElement( i, 2 );
+   VectorType outVector_5;
+   outVector_5.setSize( m_rows_5 );
+   for( IndexType j = 0; j < outVector_5.getSize(); j++ )
+       outVector_5.setElement( j, 0 );
+   m_5.vectorProduct( inVector_5, outVector_5 );
+   EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
+   EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
+   EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
+template< typename Matrix >
+void test_RowsReduction()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m.setCompressedRowLengths( rowsCapacities );
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+   m.setElement( 0, 4, value++ );       // 0th row
+   m.setElement( 0, 5, value++ );
+   m.setElement( 1, 1, value++ );       // 1st row
+   m.setElement( 1, 3, value++ );
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+   m.setElement( 2, 4, value++ );       // 2nd row
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
+      m.setElement( 3, i, value++ );
+   m.setElement( 4, 1, value++ );       // 4th row
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
+      m.setElement( 5, i, value++ );
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
+      m.setElement( 6, i, value++ );
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
+       m.setElement( 7, i, value++ );
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
+      m.setElement( i, 7, 1);
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+template< typename Matrix >
+void test_PerformSORIteration()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
+    */
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+   m.setElement( 0, 0, 4.0 );        // 0th row
+   m.setElement( 0, 1, 1.0);
+   m.setElement( 1, 0, 1.0 );        // 1st row
+   m.setElement( 1, 1, 4.0 );
+   m.setElement( 1, 2, 1.0 );
+   m.setElement( 2, 1, 1.0 );        // 2nd row
+   m.setElement( 2, 2, 4.0 );
+   m.setElement( 2, 3, 1.0 );
+   m.setElement( 3, 2, 1.0 );        // 3rd row
+   m.setElement( 3, 3, 4.0 );
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+   IndexType row = 0;
+   RealType omega = 1;
+   m.performSORIteration( bVector, row++, xVector, omega);
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+   m.performSORIteration( bVector, row++, xVector, omega);
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+   m.performSORIteration( bVector, row++, xVector, omega);
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+   m.performSORIteration( bVector, row++, xVector, omega);
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  0  5 |
+    *    |  6  7  8  0 |
+    *    \  0  9 10 11 /
+    */
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+   Matrix savedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   savedMatrix.setCompressedRowLengths( rowLengths );
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      savedMatrix.setElement( 0, i, value++ );
+   savedMatrix.setElement( 1, 1, value++ );
+   savedMatrix.setElement( 1, 3, value++ );      // 1st row
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      savedMatrix.setElement( 2, i, value++ );
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      savedMatrix.setElement( 3, i, value++ );
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
+   Matrix loadedMatrix;
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+   EXPECT_EQ( std::remove( filename ), 0 );
+template< typename Matrix >
+void test_Print()
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    |  0  8  9 10 |
+    *    \  0  0 11 12 /
+    */
+   const IndexType m_rows = 5;
+   const IndexType m_cols = 4;
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+   m.setElement( 1, 3, value++ );                // 1st row
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      m.setElement( 3, i, value++ );
+   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+      m.setElement( 4, i, value++ );
+   std::stringstream printed;
+   std::stringstream couted;
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   m.print( std::cout ); //all the std::cout goes to ss
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
+             "Row: 1 ->  Col:3->4\t\n"
+             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
+             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
+             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+   EXPECT_EQ( printed.str(), couted.str() );
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
index a72d548f5bdc98c6fbd7920507b4c1978f58ef00..2898a46f1a30a097cefd0a1e0487e68382e1b150 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
@@ -8,26 +8,17 @@
 /* See Copyright Notice in tnl/Copyright */
+#include <iostream>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Matrices/SparseMatrix.h>
-#include "SparseMatrixTest.h"
-#include <iostream>
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
-// test fixture for typed tests
-template< typename Matrix >
-class CSRMatrixTest : public ::testing::Test
-   using CSRMatrixType = Matrix;
+const char* saveAndLoadFileName = "test_SparseMatrixTest_CSR_segments";
 // types for which MatrixTest is instantiated
-using CSRMatrixTypes = ::testing::Types
+using MatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
@@ -49,93 +40,7 @@ using CSRMatrixTypes = ::testing::Types
-TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
-TYPED_TEST( CSRMatrixTest, Constructors )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_Constructors< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, setDimensionsTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_SetDimensions< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_SetCompressedRowLengths< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, setLikeTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_SetLike< CSRMatrixType, CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, resetTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_Reset< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, getRowTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_GetRow< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, setElementTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_SetElement< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, addElementTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_AddElement< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, vectorProductTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_VectorProduct< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, rowsReduction )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_RowsReduction< CSRMatrixType >();
-TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR_segments" );
-TYPED_TEST( CSRMatrixTest, printTest )
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-    test_Print< CSRMatrixType >();
+#include "SparseMatrixTest.h"
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..667b06513fc9df997e61b3a0491ecbf17d72f31c
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp
@@ -0,0 +1,11 @@
+                          SparseMatrixTest_ChunkedEllpack.cpp -  description
+                             -------------------
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include "SparseMatrixTest_ChunkedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6310042471caa4c2a6852ac42a875da9423b08f8
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu
@@ -0,0 +1,11 @@
+                          SparseMatrixTest_ChunkedEllpack.cu -  description
+                             -------------------
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include "SparseMatrixTest_ChunkedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..e7c35dac6e43755e83699e627c91d2c798a73b69
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
@@ -0,0 +1,57 @@
+                          SparseMatrixTest_ChunkedEllpack.h -  description
+                             -------------------
+    begin                : Mar 21, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include <iostream>
+#include <TNL/Containers/Segments/ChunkedEllpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+const char* saveAndLoadFileName = "test_SparseMatrixTest_ChunkedEllpack_segments";
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAllocator >
+using RowMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, true >;
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, false >;
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+#include "SparseMatrixTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
index 2a890e694f4ca90edc7aa3b98fba56f666c2097d..aed4d1ac16b7ebf6c2356d10305df08a5ff92340 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
@@ -8,37 +8,27 @@
 /* See Copyright Notice in tnl/Copyright */
+#include <iostream>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Matrices/SparseMatrix.h>
-#include "SparseMatrixTest.h"
-#include <iostream>
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
-// test fixture for typed tests
-template< typename Matrix >
-class EllpackMatrixTest : public ::testing::Test
-   using EllpackMatrixType = Matrix;
+const char* saveAndLoadFileName = "test_SparseMatrixTest_Ellpack_segments";
 // Row-major format is used for the host system
 template< typename Device, typename Index, typename IndexAlocator >
 using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >;
 // Column-major format is used for GPUs
 template< typename Device, typename Index, typename IndexAllocator >
 using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >;
 // types for which MatrixTest is instantiated
-using EllpackMatrixTypes = ::testing::Types
+using MatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
@@ -60,99 +50,6 @@ using EllpackMatrixTypes = ::testing::Types
-TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes);
-TYPED_TEST( EllpackMatrixTest, Constructors )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_Constructors< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, setDimensionsTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_SetDimensions< EllpackMatrixType >();
-//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest )
-////    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-////    test_SetCompressedRowLengths< EllpackMatrixType >();
-//    bool testRan = false;
-//    EXPECT_TRUE( testRan );
-//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-//    std::cout << "      This test is dependent on the input format. \n";
-//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
-//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
-TYPED_TEST( EllpackMatrixTest, setLikeTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_SetLike< EllpackMatrixType, EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, resetTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_Reset< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, getRowTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_GetRow< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, setElementTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_SetElement< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, addElementTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_AddElement< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, vectorProductTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_VectorProduct< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, rowsReduction )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_RowsReduction< EllpackMatrixType >();
-TYPED_TEST( EllpackMatrixTest, saveAndLoadTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" );
-TYPED_TEST( EllpackMatrixTest, printTest )
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-    test_Print< EllpackMatrixType >();
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
index 17b48dcf461e4b8e99a9d1d9172ded8301b20038..96115a25c800e6742ba83359e0c3b818ba78b41b 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
@@ -8,38 +8,28 @@
 /* See Copyright Notice in tnl/Copyright */
+#include <iostream>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
-#include "SparseMatrixTest.h"
-#include <iostream>
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
-// test fixture for typed tests
-template< typename Matrix >
-class SlicedEllpackMatrixTest : public ::testing::Test
-   using SlicedEllpackMatrixType = Matrix;
+const char* saveAndLoadFileName = "test_SparseMatrixTest_SlicedEllpack_segments";
 // Row-major format is used for the host system
 template< typename Device, typename Index, typename IndexAllocator >
 using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >;
 // Column-major format is used for GPUs
 template< typename Device, typename Index, typename IndexAllocator >
 using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >;
 // types for which MatrixTest is instantiated
-using SlicedEllpackMatrixTypes = ::testing::Types
+using MatrixTypes = ::testing::Types
     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
     TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
@@ -61,92 +51,6 @@ using SlicedEllpackMatrixTypes = ::testing::Types
-TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes);
-TYPED_TEST( SlicedEllpackMatrixTest, Constructors )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_Constructors< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_SetDimensions< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
-   using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-   test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, resetTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_Reset< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, getRowTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_GetRow< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, setElementTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_SetElement< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, addElementTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_AddElement< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_VectorProduct< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, rowsReduction )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_RowsReduction< SlicedEllpackMatrixType >();
-TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" );
-TYPED_TEST( SlicedEllpackMatrixTest, printTest )
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-    test_Print< SlicedEllpackMatrixType >();
 #include "../main.h"