diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 624a197299464f1cb2740697b04e289217ac636b..de46d4c8278f952d375fc303683b9be6a6b44373 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -45,8 +45,6 @@ stages:
           fi
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
-        # enforce (more or less) warning-free builds
-        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
@@ -64,6 +62,7 @@ stages:
                 -DWITH_EXAMPLES=${WITH_EXAMPLES}
                 -DWITH_TOOLS=${WITH_TOOLS}
                 -DWITH_PYTHON=${WITH_PYTHON}
+                -DWITH_CI_FLAGS=yes
         # "install" implies the "all" target
         - ninja ${NINJAFLAGS} install
         - if [[ ${WITH_TESTS} == "yes" ]]; then
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 68252ba6a74c45adc73dbd4e18b0afb5c81e5a67..ea0d8a30b6144e74950d2b1d9fde302a50e53e28 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,6 +96,11 @@ set( CMAKE_SHARED_LINKER_FLAGS "" )
 set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-rdynamic" )
 set( CMAKE_SHARED_LINKER_FLAGS_RELEASE "" )
 
+if( ${WITH_CI_FLAGS} )
+   # enforce (more or less) warning-free builds
+   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" )
+endif()
+
 # set additional Debug/Release options using generator expressions
 # (that way we can exclude some options for specific targets, see https://stackoverflow.com/a/59734798 for details)
 add_compile_options(
diff --git a/build b/build
index 67492f02c3c5c90647dceb9c607e9e0f0891d108..ee74fa87bfb98b0db0bd87976afcfe8ad9f741a8 100755
--- a/build
+++ b/build
@@ -31,6 +31,7 @@ WITH_EXAMPLES="yes"
 WITH_PYTHON="yes"
 WITH_TOOLS="yes"
 WITH_BENCHMARKS="yes"
+WITH_CI_FLAGS="no"
 
 for option in "$@"
 do
@@ -62,6 +63,7 @@ do
         --with-tools=*                   ) WITH_TOOLS="${option#*=}" ;;
         --with-benchmarks=*              ) WITH_BENCHMARKS="${option#*=}" ;;
         --with-python=*                  ) WITH_PYTHON="${option#*=}" ;;
+        --with-ci-flags=*                ) WITH_CI_FLAGS="${option#*=}" ;;
         *                                )
            echo "Unknown option ${option}. Use --help for more information."
            exit 1 ;;
@@ -142,6 +144,7 @@ cmake_command=(
          -DWITH_TOOLS=${WITH_TOOLS}
          -DWITH_PYTHON=${WITH_PYTHON}
          -DWITH_BENCHMARKS=${WITH_BENCHMARKS}
+         -DWITH_CI_FLAGS=${WITH_CI_FLAGS}
          -DDCMTK_DIR=${DCMTK_DIR}
 )
 
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index d515d52d73d513d87b86d4b743d8b0e27b20e0ca..6685b9f766237dbcbe2d2a017eb1d8dac9a87135 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -15,10 +15,10 @@
 #include "../Benchmarks.h"
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
-#include <TNL/Matrices/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
 
 namespace TNL {
 namespace Benchmarks {
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index aa4b29424d2b93b323017e5501231a57874ccfa4..b90b11088ef8f73511adb2ba5c58448e93e2bcf8 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -30,7 +30,7 @@
 #include "../Benchmarks.h"
 #include "ordering.h"
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 4aabf39cd4bae98bc411fcc95feef56672b039ca..ea39d80b7b8011f53e7187e53a62e8446bdb8b82 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -55,7 +55,7 @@
    #define HAVE_CUSOLVER
 #endif
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
@@ -365,8 +365,7 @@ struct LinearSolversBenchmark
       // load the matrix
       if( ends_with( file_matrix, ".mtx" ) ) {
          Matrices::MatrixReader< MatrixType > reader;
-         if( ! reader.readMtxFile( file_matrix, *matrixPointer ) )
-            return false;
+         reader.readMtxFile( file_matrix, *matrixPointer );
       }
       else {
          matrixPointer->load( file_matrix );
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index d29b680bc8835a5615073fbf61e3cc13a74dfca2..dad2cdd8dc309b38844c1c5eb27232f75d6092dc 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -36,8 +36,6 @@
 #include "Euler.h"
 #include "Merson.h"
 
-#include <TNL/Matrices/SlicedEllpack.h>
-
 using namespace TNL;
 using namespace TNL::Benchmarks;
 using namespace TNL::Pointers;
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 6bfee5ffef7702219493d124d72da5d15fc8c54b..02a26854dc06a5ad0de8230a6f351d24a2526975 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -17,14 +17,21 @@
 #include "../Benchmarks.h"
 
 #include <TNL/Pointers/DevicePointer.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
-#include <TNL/Matrices/ChunkedEllpack.h>
-#include <TNL/Matrices/AdEllpack.h>
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
 #include <TNL/Matrices/MatrixReader.h>
+#include <TNL/Matrices/MatrixInfo.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
 using namespace TNL::Matrices;
 
 #include "cusparseCSRMatrix.h"
@@ -36,6 +43,22 @@ namespace Benchmarks {
 template< typename Real, typename Device, typename Index >
 using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >;
 
+// Segments based sparse matrix aliases
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;
+
 // Get the name (with extension) of input matrix file
 std::string getMatrixFileName( const String& InputFileName )
 {
@@ -85,7 +108,7 @@ void printMatrixInfo( const Matrix& matrix,
 template< typename Real,
           template< typename, typename, typename > class Matrix,
           template< typename, typename, typename, typename > class Vector = Containers::Vector >
-bool
+void
 benchmarkSpMV( Benchmark& benchmark,
                const String& inputFileName,
                bool verboseMR )
@@ -98,19 +121,7 @@ benchmarkSpMV( Benchmark& benchmark,
     CSR_DeviceMatrix CSRdeviceMatrix;
 
     // Read the matrix for CSR, to set up cuSPARSE
-    try
-      {
-         if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) )
-         {
-             throw std::bad_alloc();
-             return false;
-         }
-      }
-      catch( std::bad_alloc& e )
-      {
-          e.what();
-          return false;
-      }
+    MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR );
 
 #ifdef HAVE_CUDA
     // cuSPARSE handle setup
@@ -140,19 +151,7 @@ benchmarkSpMV( Benchmark& benchmark,
     CudaVector deviceVector, deviceVector2;
 
     // Load the format
-    try
-      {
-         if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) )
-         {
-             throw std::bad_alloc();
-             return false;
-         }
-      }
-      catch( std::bad_alloc& e )
-      {
-          e.what();
-          return false;
-      }
+    MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
 
 
     // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
@@ -162,7 +161,7 @@ benchmarkSpMV( Benchmark& benchmark,
           { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
           { "rows", convertToString( hostMatrix.getRows() ) },
           { "columns", convertToString( hostMatrix.getColumns() ) },
-          { "matrix format", convertToString( getMatrixFormat( hostMatrix ) ) }
+          { "matrix format", MatrixInfo< HostMatrix >::getFormat() } //convertToString( getType( hostMatrix ) ) }
        } ));
 
     hostVector.setSize( hostMatrix.getColumns() );
@@ -244,7 +243,7 @@ benchmarkSpMV( Benchmark& benchmark,
 
     resultcuSPARSEDeviceVector2 = deviceVector2;
 
-    // Difference between GPU (curent format) and GPU-cuSPARSE results
+    // Difference between GPU (current format) and GPU-cuSPARSE results
     //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
     Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) );
     //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
@@ -279,26 +278,35 @@ benchmarkSpMV( Benchmark& benchmark,
  #endif
 
     std::cout << std::endl;
-    return true;
 }
 
 template< typename Real = double,
           typename Index = int >
-bool
+void
 benchmarkSpmvSynthetic( Benchmark& benchmark,
                         const String& inputFileName,
                         bool verboseMR )
 {
-   bool result = true;
-   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR );
+   
+   benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR );
+   
+   benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
+
+   ////
+   // Segments based sparse matrices
+   
+   
+   //
 
    // AdEllpack is broken
-//   result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
-   result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
-   return result;
+   // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
+   //benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
 }
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 77c079c4c562408a63182ca910c9ebfc5d68e111..65416f0432085f744ee66a80efb497242ef0db81 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -96,11 +96,11 @@ main( int argc, char* argv[] )
    //           * The guide on what parameters to use prints twice.
    // FIXME: When ./tnl-benchmark-spmv-dbg is called with '--help':
    //           * The guide on what parameter to use print once. 
-   //               But then it CRASHES due to segfault:
-//                    The program attempts to get unknown parameter openmp-enabled
-//                    Aborting the program.
-//                    terminate called after throwing an instance of 'int'
-//                    [1]    17156 abort (core dumped)  ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help
+   //              But then it CRASHES due to segfault:
+   //              The program attempts to get unknown parameter openmp-enabled
+   //              Aborting the program.
+   //              terminate called after throwing an instance of 'int'
+   //      [1]    17156 abort (core dumped)  ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help
 
    if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) {
       conf_desc.printUsage( argv[ 0 ] );
diff --git a/src/Python/pytnl/tnl/SparseMatrix.cpp b/src/Python/pytnl/tnl/SparseMatrix.cpp
index e6584998313fa9e3c1314c6f67b99267815cf0a8..fe3ba5aca7f2dbb96817760620cf3369b3b43140 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.cpp
+++ b/src/Python/pytnl/tnl/SparseMatrix.cpp
@@ -3,9 +3,9 @@
 
 #include "SparseMatrix.h"
 
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 
 using CSR_host = TNL::Matrices::CSR< double, TNL::Devices::Host, int >;
 using CSR_cuda = TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >;
diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h
index 1a32bd257f52a14f07579abe3671df1978cfc4d2..e4064e1a411364084e2422d1a18f8814a1271f0b 100644
--- a/src/Python/pytnl/tnl/SparseMatrix.h
+++ b/src/Python/pytnl/tnl/SparseMatrix.h
@@ -5,7 +5,7 @@ namespace py = pybind11;
 
 #include <TNL/String.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 template< typename Matrix >
 struct SpecificExports
@@ -51,7 +51,7 @@ void export_Matrix( py::module & m, const char* name )
 
     using VectorType = TNL::Containers::Vector< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >;
 
-    void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVector&) const = &Matrix::getCompressedRowLengths;
+    void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVectorView) const = &Matrix::getCompressedRowLengths;
 
     auto matrix = py::class_< Matrix, TNL::Object >( m, name )
         .def(py::init<>())
@@ -72,7 +72,7 @@ void export_Matrix( py::module & m, const char* name )
         .def("getCompressedRowLengths", _getCompressedRowLengths)
         // TODO: export for more types
         .def("setLike",                 &Matrix::template setLike< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >)
-        .def("getNumberOfMatrixElements", &Matrix::getNumberOfMatrixElements)
+        .def("getAllocatedElementsCount", &Matrix::getAllocatedElementsCount)
         .def("getNumberOfNonzeroMatrixElements", &Matrix::getNumberOfNonzeroMatrixElements)
         .def("reset",                   &Matrix::reset)
         .def("getRows",                 &Matrix::getRows)
diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h
new file mode 100644
index 0000000000000000000000000000000000000000..6b5c5b4e013c4a7283e1fe827fe59638dbbd0128
--- /dev/null
+++ b/src/TNL/Algorithms/AtomicOperations.h
@@ -0,0 +1,94 @@
+/***************************************************************************
+                          AtomicOperations.h  -  description
+                             -------------------
+    begin                : Feb 26, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber, Jakub Klinkovsky
+
+#pragma once
+
+#ifdef HAVE_CUDA
+#include <cuda.h>
+#endif
+#include <TNL/Devices/Sequential.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+namespace TNL {
+namespace Algorithms {
+
+template< typename Device >
+struct AtomicOperations{};
+
+template<>
+struct AtomicOperations< Devices::Host >
+{
+   template< typename Value >
+   static void add( Value& v, const Value& a )
+   {
+#pragma omp atomic update
+      v += a;
+   }
+};
+
+template<>
+struct AtomicOperations< Devices::Cuda >
+{
+   template< typename Value >
+   __cuda_callable__
+   static void add( Value& v, const Value& a )
+   {
+#ifdef HAVE_CUDA
+      atomicAdd( &v, a );
+#endif // HAVE_CUDA
+   }
+
+#ifdef HAVE_CUDA
+   __device__
+   static void add( double& v, const double& a )
+   {
+#if __CUDA_ARCH__ < 600
+      unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
+      unsigned long long int old = *v_as_ull, assumed;
+
+      do
+      {
+         assumed = old;
+         old = atomicCAS( v_as_ull,
+                          assumed,
+                          __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ;
+
+      // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+      }
+      while( assumed != old );
+#else // __CUDA_ARCH__ < 600
+      atomicAdd( &v, a );
+#endif //__CUDA_ARCH__ < 600
+   }
+#else // HAVE_CUDA
+   static void add( double& v, const double& a ){}
+#endif // HAVE_CUDA
+
+   __cuda_callable__
+   static void add( long int& v, const long int& a )
+   {
+#ifdef HAVE_CUDA
+      TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
+#endif // HAVE_CUDA
+   }
+
+   __cuda_callable__
+   static void add( short int& v, const short int& a )
+   {
+#ifdef HAVE_CUDA
+      TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." );
+#endif // HAVE_CUDA
+   }
+};
+} //namespace Algorithms
+} //namespace TNL
diff --git a/src/TNL/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h
index 79a2019594922eee640672edb12d8ef6e9132dd0..97912b2343907e504db133ae545f5f420f18f0e3 100644
--- a/src/TNL/Algorithms/CudaScanKernel.h
+++ b/src/TNL/Algorithms/CudaScanKernel.h
@@ -277,7 +277,7 @@ struct CudaScanKernelLauncher
               elementsInBlock,
               &deviceInput[ gridOffset ],
               &deviceOutput[ gridOffset ],
-              &blockSums[ gridIdx * maxGridSize() ] );
+              &blockSums.getData()[ gridIdx * maxGridSize() ] );
       }
 
       // synchronize the null-stream after all grids
diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp
index 7b6d31ece513144c5b0cec9947a232b940fb5e30..fc1f2f1e5449a12c56b525c92854705e7bd003e6 100644
--- a/src/TNL/Algorithms/Scan.hpp
+++ b/src/TNL/Algorithms/Scan.hpp
@@ -225,8 +225,8 @@ perform( Vector& v,
 
    CudaScanKernelLauncher< Type, RealType, IndexType >::perform(
       end - begin,
-      &v[ begin ],  // input
-      &v[ begin ],  // output
+      &v.getData()[ begin ],  // input
+      &v.getData()[ begin ],  // output
       reduction,
       zero );
 #else
@@ -251,8 +251,8 @@ performFirstPhase( Vector& v,
 
    return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase(
       end - begin,
-      &v[ begin ],  // input
-      &v[ begin ],  // output
+      &v.getData()[ begin ],  // input
+      &v.getData()[ begin ],  // output
       reduction,
       zero );
 #else
@@ -279,7 +279,7 @@ performSecondPhase( Vector& v,
 
    CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase(
       end - begin,
-      &v[ begin ],  // output
+      &v.getData()[ begin ],  // output
       blockShifts.getData(),
       reduction,
       shift );
diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index dc4ba7cf439d4ea25576d75fd1605c4400f6fa23..630abd09fd04272144c47e4bbfeff17f2d6442a4 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -394,7 +394,7 @@ TNL_IMPL_CMP_HELPER_( GT, > );
    pred( __TNL_JOIN_STRINGS( val1, op, val2 ), \
          msg, __FILE__, __TNL_PRETTY_FUNCTION, __LINE__, \
          #val1, #val2, val1, val2 )
-   
+
 // Main definitions of the TNL_ASSERT_* macros
 // unary
 #define TNL_ASSERT_TRUE( val, msg ) \
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index 117cb32ae4a84afb803f84dbf34d54d1948c0f2b..1166245110501aa6c8c66c5a81448ab82895b54d 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -137,6 +137,15 @@ class Array
        */
       explicit Array( const IndexType& size, const AllocatorType& allocator = AllocatorType() );
 
+      /**
+       * \brief Constructs an array with given size and value.
+       *
+       * \param size The number of array elements to be allocated.
+       * \param value The value all elements will be set to.
+       * \param allocator The allocator to be associated with this array.
+       */
+      explicit Array( const IndexType& size, const Value& value, const AllocatorType& allocator = AllocatorType() );
+
       /**
        * \brief Constructs an array with given size and copies data from given
        * pointer.
@@ -446,7 +455,10 @@ class Array
        * to the memory space where the array was allocated. For example, if the
        * array was allocated in the host memory, it can be called only from
        * host, and if the array was allocated in the device memory, it can be
-       * called only from device kernels.
+       * called only from device kernels. If NDEBUG is not defined, assertions
+       * inside this methods performs runtime checks for cross-device memory
+       * accesses which lead to segmentation fault. If you need to do just a
+       * pointer arithmetics use \e getData instead.
        *
        * \param i The index of the element to be accessed.
        * \return Reference to the \e i-th element.
@@ -460,7 +472,10 @@ class Array
        * to the memory space where the array was allocated. For example, if the
        * array was allocated in the host memory, it can be called only from
        * host, and if the array was allocated in the device memory, it can be
-       * called only from device kernels.
+       * called only from device kernels. If NDEBUG is not defined, assertions
+       * inside this methods performs runtime checks for cross-device memory
+       * accesses which lead to segmentation fault. If you need to do just a
+       * pointer arithmetics use \e getData instead.
        *
        * \param i The index of the element to be accessed.
        * \return Constant reference to the \e i-th element.
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 24e3f8b43a024c8c8c3b87213a31886c595caceb..ab81db7aabc214ab66751905446849179aa1975a 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -62,6 +62,18 @@ Array( const IndexType& size, const AllocatorType& allocator )
    this->setSize( size );
 }
 
+template< typename Value,
+          typename Device,
+          typename Index,
+          typename Allocator >
+Array< Value, Device, Index, Allocator >::
+Array( const IndexType& size, const Value& value, const AllocatorType& allocator )
+: allocator( allocator )
+{
+   this->setSize( size );
+   *this = value;
+}
+
 template< typename Value,
           typename Device,
           typename Index,
@@ -509,6 +521,11 @@ Value&
 Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i )
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return this->data[ i ];
@@ -523,6 +540,11 @@ const Value&
 Array< Value, Device, Index, Allocator >::
 operator[]( const Index& i ) const
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return this->data[ i ];
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index c06ad56dcc113541167b9d012ca4caf836a4f5c5..5b9766ffd2b07d00318edff0c3c24080a020d3b6 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -338,7 +338,10 @@ public:
     * to the memory space where the data was allocated. For example, if the
     * data was allocated in the host memory, it can be called only from
     * host, and if the data was allocated in the device memory, it can be
-    * called only from device kernels.
+    * called only from device kernels. If NDEBUG is not defined, assertions
+    * inside this methods performs runtime checks for cross-device memory
+    * accesses which lead to segmentation fault. If you need to do just a
+    * pointer arithmetics use \e getData instead.
     *
     * \param i The index of the element to be accessed.
     * \return Reference to the \e i-th element.
@@ -353,7 +356,10 @@ public:
     * to the memory space where the data was allocated. For example, if the
     * data was allocated in the host memory, it can be called only from
     * host, and if the data was allocated in the device memory, it can be
-    * called only from device kernels.
+    * called only from device kernels. If NDEBUG is not defined, assertions
+    * inside this methods performs runtime checks for cross-device memory
+    * accesses which lead to segmentation fault. If you need to do just a
+    * pointer arithmetics use \e getData instead.
     *
     * \param i The index of the element to be accessed.
     * \return Constant reference to the \e i-th element.
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index c3c39bc10be8dd846331d1086fc1d22b42b8c6c7..e36182cd54acfc17075a73944cb72df7ed3eb042 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -239,6 +239,11 @@ __cuda_callable__
 Value& ArrayView< Value, Device, Index >::
 operator[]( Index i )
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return data[ i ];
@@ -252,6 +257,11 @@ const
 Value& ArrayView< Value, Device, Index >::
 operator[]( Index i ) const
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." );
+#else
+   TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." );
+#endif
    TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
    TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return data[ i ];
diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
new file mode 100644
index 0000000000000000000000000000000000000000..89cad0c6af3d80d9b9b78d336f4dfb95ff69cfc6
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -0,0 +1,133 @@
+/***************************************************************************
+                          CSR.h -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/CSRView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+class CSR
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >;
+      using SegmentsSizes = OffsetsHolder;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = CSRView< Device_, Index_ >;
+      using ViewType = CSRView< Device, Index >;
+      using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
+      using SegmentViewType = SegmentView< IndexType, true >;
+
+      CSR();
+
+      CSR( const SegmentsSizes& sizes );
+
+      CSR( const CSR& segments );
+
+      CSR( const CSR&& segments );
+
+      static String getSerializationType();
+
+      static String getSegmentsType();
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Returns number of elements managed by all segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      CSR& operator=( const CSR& rhsSegments ) = default;
+
+      template< typename Device_, typename Index_, typename IndexAllocator_ >
+      CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      OffsetsHolder offsets;
+};
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/CSR.hpp>
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9a948b04e2dac7311c6eab9f4149cf779256c59f
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -0,0 +1,291 @@
+/***************************************************************************
+                          CSR.hpp -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/details/CSR.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
+CSR()
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
+CSR( const SegmentsSizes& segmentsSizes )
+{
+   this->setSegmentsSizes( segmentsSizes );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
+CSR( const CSR& csr ) : offsets( csr.offsets )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+CSR< Device, Index, IndexAllocator >::
+CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
+{
+
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+String
+CSR< Device, Index, IndexAllocator >::
+getSerializationType()
+{
+   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+String
+CSR< Device, Index, IndexAllocator >::
+getSegmentsType()
+{
+   return ViewType::getSegmentsType();
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+   template< typename SizesHolder >
+void
+CSR< Device, Index, IndexAllocator >::
+setSegmentsSizes( const SizesHolder& sizes )
+{
+   details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+typename CSR< Device, Index, IndexAllocator >::ViewType
+CSR< Device, Index, IndexAllocator >::
+getView()
+{
+   return ViewType( this->offsets.getView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+typename CSR< Device, Index, IndexAllocator >::ConstViewType
+CSR< Device, Index, IndexAllocator >::
+getConstView() const
+{
+   return ConstViewType( this->offsets.getConstView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+CSR< Device, Index, IndexAllocator >::
+getSegmentsCount() const
+{
+   return this->offsets.getSize() - 1;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+CSR< Device, Index, IndexAllocator >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+CSR< Device, Index, IndexAllocator >::
+getSize() const
+{
+   return this->getStorageSize();
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+CSR< Device, Index, IndexAllocator >::
+getStorageSize() const
+{
+   return details::CSR< Device, Index >::getStorageSize( this->offsets );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+CSR< Device, Index, IndexAllocator >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx ] + localIdx;
+#else
+      return offsets.getElement( segmentIdx ) + localIdx;
+#endif
+   }
+   return offsets[ segmentIdx ] + localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+void
+CSR< Device, Index, IndexAllocator >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+__cuda_callable__
+auto
+CSR< Device, Index, IndexAllocator >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+{
+   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+   template< typename Function, typename... Args >
+void
+CSR< Device, Index, IndexAllocator >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      const IndexType begin = offsetsView[ segmentIdx ];
+      const IndexType end = offsetsView[ segmentIdx + 1 ];
+      IndexType localIdx( 0 );
+      for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+         if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+            break;
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator>
+   template< typename Function, typename... Args >
+void
+CSR< Device, Index, IndexAllocator >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSR< Device, Index, IndexAllocator >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      RealType aux( zero );
+      bool compute( true );
+      IndexType localIdx( 0 );
+      for( IndexType j = begin; j < end && compute; j++  )
+         reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
+      keeper( i, aux );
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSR< Device, Index, IndexAllocator >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+   template< typename Device_, typename Index_, typename IndexAllocator_ >
+CSR< Device, Index, IndexAllocator >&
+CSR< Device, Index, IndexAllocator >::
+operator=( const CSR< Device_, Index_, IndexAllocator_ >& source )
+{
+   this->offsets = source.offsets;
+   return *this;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+void
+CSR< Device, Index, IndexAllocator >::
+save( File& file ) const
+{
+   file << this->offsets;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator >
+void
+CSR< Device, Index, IndexAllocator >::
+load( File& file )
+{
+   file >> this->offsets;
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
new file mode 100644
index 0000000000000000000000000000000000000000..f7cf815d04eb87e411cd213e0b0ca659c60bbb6c
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -0,0 +1,132 @@
+/***************************************************************************
+                          CSRView.h -  description
+                             -------------------
+    begin                : Dec 11, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index >
+class CSRView
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const< IndexType >::type >;
+      using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type >::ConstViewType;
+      using ViewType = CSRView;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = CSRView< Device_, Index_ >;
+      using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
+      using SegmentViewType = SegmentView< IndexType >;
+
+      __cuda_callable__
+      CSRView();
+
+      __cuda_callable__
+      CSRView( const OffsetsView& offsets );
+
+      __cuda_callable__
+      CSRView( const OffsetsView&& offsets );
+
+      __cuda_callable__
+      CSRView( const CSRView& csr_view );
+
+      __cuda_callable__
+      CSRView( const CSRView&& csr_view );
+
+      static String getSerializationType();
+
+      static String getSegmentsType();
+
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Returns number of elements managed by all segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      CSRView& operator=( const CSRView& view );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      OffsetsView offsets;
+};
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/CSRView.hpp>
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..02be7f099f1fd9446200d8e10340c5a6bdc6afed
--- /dev/null
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -0,0 +1,264 @@
+/***************************************************************************
+                          CSRView.hpp -  description
+                             -------------------
+    begin                : Dec 11, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/CSRView.h>
+#include <TNL/Containers/Segments/details/CSR.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView()
+{
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const OffsetsView&& offsets_view )
+   : offsets( offsets_view )
+{
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const CSRView& csr_view )
+   : offsets( csr_view.offsets )
+{
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+CSRView< Device, Index >::
+CSRView( const CSRView&& csr_view )
+   : offsets( std::move( csr_view.offsets ) )
+{
+}
+
+template< typename Device,
+          typename Index >
+String
+CSRView< Device, Index >::
+getSerializationType()
+{
+   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
+template< typename Device,
+          typename Index >
+String
+CSRView< Device, Index >::
+getSegmentsType()
+{
+   return "CSR";
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+typename CSRView< Device, Index >::ViewType
+CSRView< Device, Index >::
+getView()
+{
+   return ViewType( this->offsets );
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+typename CSRView< Device, Index >::ConstViewType
+CSRView< Device, Index >::
+getConstView() const
+{
+   return ConstViewType( this->offsets.getConstView() );
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getSegmentsCount() const
+{
+   return this->offsets.getSize() - 1;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getSize() const
+{
+   return this->getStorageSize();
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getStorageSize() const
+{
+   return details::CSR< Device, Index >::getStorageSize( this->offsets );
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+Index
+CSRView< Device, Index >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( ! std::is_same< DeviceType, Devices::Host >::value )
+   {
+#ifdef __CUDA_ARCH__
+      return offsets[ segmentIdx ] + localIdx;
+#else
+      return offsets.getElement( segmentIdx ) + localIdx;
+#endif
+   }
+   return offsets[ segmentIdx ] + localIdx;
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+void
+CSRView< Device, Index >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index >
+__cuda_callable__
+auto
+CSRView< Device, Index >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+{
+   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+CSRView< Device, Index >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto offsetsView = this->offsets;
+   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      const IndexType begin = offsetsView[ segmentIdx ];
+      const IndexType end = offsetsView[ segmentIdx + 1 ];
+      IndexType localIdx( 0 );
+      bool compute( true );
+      for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+         f( segmentIdx, localIdx++, globalIdx, compute, args... );
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Function, typename... Args >
+void
+CSRView< Device, Index >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSRView< Device, Index >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   const auto offsetsView = this->offsets.getConstView();
+   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+      const IndexType begin = offsetsView[ i ];
+      const IndexType end = offsetsView[ i + 1 ];
+      RealType aux( zero );
+      IndexType localIdx( 0 );
+      bool compute( true );
+      for( IndexType j = begin; j < end && compute; j++  )
+         reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
+      keeper( i, aux );
+   };
+   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+
+template< typename Device,
+          typename Index >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+CSRView< Device, Index >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index >
+CSRView< Device, Index >&
+CSRView< Device, Index >::
+operator=( const CSRView& view )
+{
+   this->offsets.bind( view.offsets );
+   return *this;
+}
+
+template< typename Device,
+          typename Index >
+void
+CSRView< Device, Index >::
+save( File& file ) const
+{
+   file << this->offsets;
+}
+
+template< typename Device,
+          typename Index >
+void
+CSRView< Device, Index >::
+load( File& file )
+{
+   file >> this->offsets;
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..a1188a854e952f15f1f2449cc33c9535ccfac10a
--- /dev/null
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -0,0 +1,131 @@
+/***************************************************************************
+                          Ellpack.h -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/EllpackView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int Alignment = 32 >
+class Ellpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      static constexpr int getAlignment() { return Alignment; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >;
+      using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >;
+      //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
+
+      Ellpack();
+
+      Ellpack( const SegmentsSizes& sizes );
+
+      Ellpack( const IndexType segmentsCount, const IndexType segmentSize );
+
+      Ellpack( const Ellpack& segments );
+
+      Ellpack( const Ellpack&& segments );
+
+      static String getSerializationType();
+
+      static String getSegmentsType();
+
+      ViewType getView();
+
+      //ConstViewType getConstView() const;
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize );
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      Ellpack& operator=( const Ellpack& source ) = default;
+
+      template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ >
+      Ellpack& operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType segmentSize, size, alignedSize;
+};
+
+      } // namespace Segements
+   }  // namespace Containers
+} // namespace TNL
+
+#include <TNL/Containers/Segments/Ellpack.hpp>
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9c59c5529eada436df075bd130ddaa16f0ef20ea
--- /dev/null
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -0,0 +1,412 @@
+/***************************************************************************
+                          Ellpack.hpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+Ellpack()
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+Ellpack( const SegmentsSizes& segmentsSizes )
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+   this->setSegmentsSizes( segmentsSizes );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+Ellpack( const IndexType segmentsCount, const IndexType segmentSize )
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+   this->setSegmentsSizes( segmentsCount, segmentSize );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+Ellpack( const Ellpack& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+Ellpack( const Ellpack&& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+String
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSerializationType()
+{
+   return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+String
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentsType()
+{
+   return ViewType::getSegmentsType();
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ViewType
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getView()
+{
+   return ViewType( segmentSize, size, alignedSize );
+}
+
+/*template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ConstViewType
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getConstView() const
+{
+   return ConstViewType( segmentSize, size, alignedSize );
+}*/
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename SizesHolder >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+setSegmentsSizes( const SizesHolder& sizes )
+{
+   this->segmentSize = max( sizes );
+   this->size = sizes.getSize();
+   if( RowMajorOrder )
+      this->alignedSize = this->size;
+   else
+      this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize )
+{
+   this->segmentSize = segmentSize;
+   this->size = segmentsCount;
+   if( RowMajorOrder )
+      this->alignedSize = this->size;
+   else
+      this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
+}
+
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentsCount() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSize() const
+{
+   return this->size * this->segmentSize;
+}
+
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getStorageSize() const
+{
+   return this->alignedSize * this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( RowMajorOrder )
+      return segmentIdx * this->segmentSize + localIdx;
+   else
+      return segmentIdx + this->alignedSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+auto
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+{
+   if( RowMajorOrder )
+      return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+   else
+      return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Function, typename... Args >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx,  args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx;
+         const IndexType end = storageSize;
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Function, typename... Args >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         bool compute( true );
+         for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++  )
+            reduction( aux, fetch( i, localIdx, j, compute, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         RealType aux( zero );
+         bool compute( true );
+         for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++  )
+            reduction( aux, fetch( i, localIdx, j, compute, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ >
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >&
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source )
+{
+   this->segmentSize = source.segmentSize;
+   this->size = source.size;
+   this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment();
+   return *this;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+save( File& file ) const
+{
+   file.save( &segmentSize );
+   file.save( &size );
+   file.save( &alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int Alignment >
+void
+Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
+load( File& file )
+{
+   file.load( &segmentSize );
+   file.load( &size );
+   file.load( &alignedSize );
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
new file mode 100644
index 0000000000000000000000000000000000000000..10a89bd7bafd7de62a8e5f37f567478a3d4af1ee
--- /dev/null
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -0,0 +1,126 @@
+/***************************************************************************
+                          EllpackView.h -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int Alignment = 32 >
+class EllpackView
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      static constexpr int getAlignment() { return Alignment; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >;
+      using ViewType = EllpackView;
+      using ConstViewType = EllpackView< Device, std::add_const_t< Index > >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
+
+      __cuda_callable__
+      EllpackView();
+
+      __cuda_callable__
+      EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize );
+
+      __cuda_callable__
+      EllpackView( const EllpackView& ellpackView );
+
+      __cuda_callable__
+      EllpackView( const EllpackView&& ellpackView );
+
+      static String getSerializationType();
+
+      static String getSegmentsType();
+
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      EllpackView& operator=( const EllpackView& view );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType segmentSize, size, alignedSize;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/EllpackView.hpp>
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..84086f380bfb12ac86113f82a76e40db1fbabdef
--- /dev/null
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -0,0 +1,349 @@
+/***************************************************************************
+                          EllpackView.hpp -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/EllpackView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView()
+   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize )
+   : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView( const EllpackView& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+EllpackView( const EllpackView&& ellpack )
+   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+String
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSerializationType()
+{
+   return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+String
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentsType()
+{
+   return "Ellpack";
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getView()
+{
+   return ViewType( segmentSize, size, alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getConstView() const
+{
+   return ConstViewType( segmentSize, size, alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentsCount() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   return this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSize() const
+{
+   return this->size * this->segmentSize;
+}
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getStorageSize() const
+{
+   return this->alignedSize * this->segmentSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+Index
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   if( RowMajorOrder )
+      return segmentIdx * this->segmentSize + localIdx;
+   else
+      return segmentIdx + this->alignedSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+__cuda_callable__
+auto
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+{
+   if( RowMajorOrder )
+      return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+   else
+      return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Function, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType begin = segmentIdx;
+         const IndexType end = storageSize;
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += alignedSize )
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Function, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   if( RowMajorOrder )
+   {
+      const IndexType segmentSize = this->segmentSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j++  )
+            reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      const IndexType storageSize = this->getStorageSize();
+      const IndexType alignedSize = this->alignedSize;
+      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
+         const IndexType begin = i;
+         const IndexType end = storageSize;
+         RealType aux( zero );
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j += alignedSize  )
+            reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
+         keeper( i, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+EllpackView< Device, Index, RowMajorOrder, Alignment >&
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view )
+{
+   this->segmentSize = view.segmentSize;
+   this->size = view.size;
+   this->alignedSize = view.alignedSize;
+   return *this;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+save( File& file ) const
+{
+   file.save( &segmentSize );
+   file.save( &size );
+   file.save( &alignedSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int Alignment >
+void
+EllpackView< Device, Index, RowMajorOrder, Alignment >::
+load( File& file )
+{
+   file.load( &segmentSize );
+   file.load( &size );
+   file.load( &alignedSize );
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h
new file mode 100644
index 0000000000000000000000000000000000000000..eeb3f9d244cc2c4126bb8d92c824b5ca7ffc9a67
--- /dev/null
+++ b/src/TNL/Containers/Segments/SegmentView.h
@@ -0,0 +1,89 @@
+/***************************************************************************
+                          SegmentView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Index,
+          bool RowMajorOrder = false >
+class SegmentView;
+
+template< typename Index >
+class SegmentView< Index, false >
+{
+   public:
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      SegmentView( const IndexType offset,
+                   const IndexType size,
+                   const IndexType step )
+      : segmentOffset( offset ), segmentSize( size ), step( step ){};
+
+      __cuda_callable__
+      SegmentView( const SegmentView& view )
+      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){};
+
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIndex ) const
+      {
+         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
+         return segmentOffset + localIndex * step;
+      };
+
+      protected:
+         
+         IndexType segmentOffset, segmentSize, step;
+};
+
+template< typename Index >
+class SegmentView< Index, true >
+{
+   public:
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      SegmentView( const IndexType offset,
+                   const IndexType size,
+                   const IndexType step = 1 ) // For compatibility with previous specialization
+      : segmentOffset( offset ), segmentSize( size ){};
+
+      __cuda_callable__
+      IndexType getSize() const
+      {
+         return this->segmentSize;
+      };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const IndexType localIndex ) const
+      {
+         TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." );
+         return segmentOffset + localIndex;
+      };
+
+      protected:
+         
+         IndexType segmentOffset, segmentSize;
+};
+
+      } //namespace Segments
+   } //namespace Containers
+} //namespace TNL
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..2027f1d78a96d685806f2715257098a38a624800
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -0,0 +1,131 @@
+/***************************************************************************
+                          SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Allocators/Default.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/SlicedEllpackView.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int SliceSize = 32 >
+class SlicedEllpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >;
+      static constexpr int getSliceSize() { return SliceSize; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >;
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >;
+      using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
+
+      SlicedEllpack();
+
+      SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+
+      SlicedEllpack( const SlicedEllpack& segments );
+
+      SlicedEllpack( const SlicedEllpack&& segments );
+
+      static String getSerializationType();
+
+      static String getSegmentsType();
+
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      SlicedEllpack& operator=( const SlicedEllpack& source ) = default;
+
+      template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+      SlicedEllpack& operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType size, alignedSize, segmentsCount;
+
+      OffsetsHolder sliceOffsets, sliceSegmentSizes;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/SlicedEllpack.hpp>
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ba1276e3eaea3fdf39261c99e7376c6122d4f8b
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -0,0 +1,460 @@
+/***************************************************************************
+                          SlicedEllpack.hpp -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+SlicedEllpack()
+   : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes )
+   : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
+{
+   this->setSegmentsSizes( sizes );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+SlicedEllpack( const SlicedEllpack& slicedEllpack )
+   : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ),
+     segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpack.sliceSegmentSizes )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+SlicedEllpack( const SlicedEllpack&& slicedEllpack )
+   : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ),
+     segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpack.sliceSegmentSizes )
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSerializationType()
+{
+   return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentsType()
+{
+   return ViewType::getSegmentsType();
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ViewType
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getView()
+{
+   return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ConstViewType
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getConstView() const
+{
+   return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename SizesHolder >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+setSegmentsSizes( const SizesHolder& sizes )
+{
+   this->segmentsCount = sizes.getSize();
+   const IndexType slicesCount = roundUpDivision( this->segmentsCount, getSliceSize() );
+   this->sliceOffsets.setSize( slicesCount + 1 );
+   this->sliceOffsets = 0;
+   this->sliceSegmentSizes.setSize( slicesCount );
+   Ellpack< DeviceType, IndexType, IndexAllocator, true > ellpack;
+   ellpack.setSegmentsSizes( slicesCount, SliceSize );
+
+   const IndexType _size = sizes.getSize();
+   const auto sizes_view = sizes.getConstView();
+   auto slices_view = this->sliceOffsets.getView();
+   auto slice_segment_size_view = this->sliceSegmentSizes.getView();
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
+      if( globalIdx < _size )
+         return sizes_view[ globalIdx ];
+      return 0;
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType i ) {
+      aux = TNL::max( aux, i );
+   };
+   auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) mutable {
+      slices_view[ i ] = res * SliceSize;
+      slice_segment_size_view[ i ] = res;
+   };
+   ellpack.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
+   this->sliceOffsets.template scan< Algorithms::ScanType::Exclusive >();
+   this->size = sum( sizes );
+   this->alignedSize = this->sliceOffsets.getElement( slicesCount );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentsCount() const
+{
+   return this->segmentsCount;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   const Index sliceIdx = segmentIdx / SliceSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return this->sliceSegmentSizes[ sliceIdx ];
+   else
+   {
+#ifdef __CUDA_ARCH__
+   return this->sliceSegmentSizes[ sliceIdx ];
+#else
+   return this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSize() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getStorageSize() const
+{
+   return this->alignedSize;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   IndexType sliceOffset, segmentSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+   }
+   else
+   {
+#ifdef __CUDA__ARCH__
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+#else
+      sliceOffset = this->sliceOffsets.getElement( sliceIdx );
+      segmentSize = this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+   if( RowMajorOrder )
+      return sliceOffset + segmentInSliceIdx * segmentSize + localIdx;
+   else
+      return sliceOffset + segmentInSliceIdx + SliceSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+auto
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ];
+   const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+
+   if( RowMajorOrder )
+      return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
+   else
+      return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
+   if( RowMajorOrder )
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+               break;
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
+   if( RowMajorOrder )
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         bool compute( true );
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
+         keeper( segmentIdx, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         RealType aux( zero );
+         bool compute( true );
+         IndexType localIdx( 0 );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
+         keeper( segmentIdx, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ >
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >&
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source )
+{
+   this->size = source.size;
+   this->alignedSize = source.alignedSize;
+   this->segmentsCount = source.segmentsCount;
+   this->sliceOffsets = source.sliceOffsets;
+   this->sliceSegmentSizes = source.sliceSegmentSizes;
+   return *this;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+save( File& file ) const
+{
+   file.save( &size );
+   file.save( &alignedSize );
+   file.save( &segmentsCount );
+   file << this->sliceOffsets;
+   file << this->sliceSegmentSizes;
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
+load( File& file )
+{
+   file.load( &size );
+   file.load( &alignedSize );
+   file.load( &segmentsCount );
+   file >> this->sliceOffsets;
+   file >> this->sliceSegmentSizes;
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e2e55bbc3ef040c9b15f42a41b7e0a4bc14f7d7
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -0,0 +1,130 @@
+/***************************************************************************
+                          SlicedEllpackView.h -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <type_traits>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/Segments/SegmentView.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int SliceSize = 32 >
+class SlicedEllpackView
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >;
+      static constexpr int getSliceSize() { return SliceSize; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      template< typename Device_, typename Index_ >
+      using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >;
+      using ViewType = SlicedEllpackView;
+      using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >;
+      using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
+
+      __cuda_callable__
+      SlicedEllpackView();
+
+      __cuda_callable__
+      SlicedEllpackView( IndexType size,
+                         IndexType alignedSize,
+                         IndexType segmentsCount,
+                         OffsetsView&& sliceOffsets,
+                         OffsetsView&& sliceSegmentSizes );
+
+      __cuda_callable__
+      SlicedEllpackView( const SlicedEllpackView& slicedEllpackView );
+
+      __cuda_callable__
+      SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView );
+
+      static String getSerializationType();
+
+      static String getSegmentsType();
+
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      __cuda_callable__
+      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      SlicedEllpackView& operator=( const SlicedEllpackView& view );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType size, alignedSize, segmentsCount;
+
+      OffsetsView sliceOffsets, sliceSegmentSizes;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Containers/Segments/SlicedEllpackView.hpp>
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c4e03aada22f877f1b6d0e14498193de8df9cdbd
--- /dev/null
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -0,0 +1,408 @@
+/***************************************************************************
+                          SlicedEllpackView.hpp -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Containers/Segments/SlicedEllpackView.h>
+
+#include "SlicedEllpackView.h"
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView()
+   : size( 0 ), alignedSize( 0 ), segmentsCount( 0 )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView(  IndexType size,
+                    IndexType alignedSize,
+                    IndexType segmentsCount,
+                    OffsetsView&& sliceOffsets,
+                    OffsetsView&& sliceSegmentSizes )
+   : size( size ), alignedSize( alignedSize ), segmentsCount( segmentsCount ),
+     sliceOffsets( std::forward< OffsetsView >( sliceOffsets ) ), sliceSegmentSizes( std::forward< OffsetsView >( sliceSegmentSizes ) )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView( const SlicedEllpackView& slicedEllpackView )
+   : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ),
+     segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView )
+   : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ),
+     segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ),
+     sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes )
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSerializationType()
+{
+   return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+String
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentsType()
+{
+   return "SlicedEllpack";
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getView()
+{
+   return ViewType( size, alignedSize, segmentsCount, sliceOffsets, sliceSegmentSizes );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getConstView() const
+{
+   return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentsCount() const
+{
+   return this->segmentsCount;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentSize( const IndexType segmentIdx ) const
+{
+   const Index sliceIdx = segmentIdx / SliceSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+      return this->sliceSegmentSizes[ sliceIdx ];
+   else
+   {
+#ifdef __CUDA_ARCH__
+   return this->sliceSegmentSizes[ sliceIdx ];
+#else
+   return this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSize() const
+{
+   return this->size;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getStorageSize() const
+{
+   return this->alignedSize;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+Index
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   IndexType sliceOffset, segmentSize;
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+   }
+   else
+   {
+#ifdef __CUDA_ARCH__
+      sliceOffset = this->sliceOffsets[ sliceIdx ];
+      segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+#else
+      sliceOffset = this->sliceOffsets.getElement( sliceIdx );
+      segmentSize = this->sliceSegmentSizes.getElement( sliceIdx );
+#endif
+   }
+   if( RowMajorOrder )
+      return sliceOffset + segmentInSliceIdx * segmentSize + localIdx;
+   else
+      return sliceOffset + segmentInSliceIdx + SliceSize * localIdx;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
+{
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+__cuda_callable__
+auto
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
+{
+   const IndexType sliceIdx = segmentIdx / SliceSize;
+   const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+   const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ];
+   const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
+
+   if( RowMajorOrder )
+      return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
+   else
+      return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
+{
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
+   if( RowMajorOrder )
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize )
+            f( segmentIdx, localIdx++, globalIdx, compute, args... );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Function, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+forAll( Function& f, Args... args ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
+   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
+   if( RowMajorOrder )
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
+         const IndexType end = begin + segmentSize;
+         RealType aux( zero );
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
+         keeper( segmentIdx, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+   else
+   {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+         const IndexType sliceIdx = segmentIdx / SliceSize;
+         const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
+         const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
+         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
+         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
+         RealType aux( zero );
+         IndexType localIdx( 0 );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
+         keeper( segmentIdx, aux );
+      };
+      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   }
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+   template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
+{
+   this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >&
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& view )
+{
+   this->size = view.size;
+   this->alignedSize = view.alignedSize;
+   this->segmentsCount = view.segmentsCount;
+   this->sliceOffsets.bind( view.sliceOffsets );
+   this->sliceSegmentSizes.bind( view.sliceSegmentSizes );
+   return *this;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+save( File& file ) const
+{
+   file.save( &size );
+   file.save( &alignedSize );
+   file.save( &segmentsCount );
+   file << this->sliceOffsets;
+   file << this->sliceSegmentSizes;
+}
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          int SliceSize >
+void
+SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
+load( File& file )
+{
+   file.load( &size );
+   file.load( &alignedSize );
+   file.load( &segmentsCount );
+   file >> this->sliceOffsets;
+   file >> this->sliceSegmentSizes;
+}
+
+      } // namespace Segments
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h
new file mode 100644
index 0000000000000000000000000000000000000000..38f097669150b7e3f929bdeab3beb1af03ce3e7d
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/CSR.h
@@ -0,0 +1,112 @@
+/***************************************************************************
+                          CSR.h -  description
+                             -------------------
+    begin                : Dec 12, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+         namespace details {
+
+template< typename Device,
+          typename Index >
+class CSR
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+
+      template< typename SizesHolder, typename CSROffsets >
+      static void setSegmentsSizes( const SizesHolder& sizes, CSROffsets& offsets )
+      {
+         offsets.setSize( sizes.getSize() + 1 );
+         auto view = offsets.getView( 0, sizes.getSize() );
+         view = sizes;
+         offsets.setElement( sizes.getSize(), 0 );
+         offsets.template scan< Algorithms::ScanType::Exclusive >();
+      }
+
+      template< typename CSROffsets >
+      __cuda_callable__
+      static IndexType getSegmentsCount( const CSROffsets& offsets )
+      {
+         return offsets.getSize() - 1;
+      }
+
+      /***
+       * \brief Returns size of the segment number \r segmentIdx
+       */
+      template< typename CSROffsets >
+      __cuda_callable__
+      static IndexType getSegmentSize( const CSROffsets& offsets, const IndexType segmentIdx )
+      {
+         if( ! std::is_same< DeviceType, Devices::Host >::value )
+         {
+#ifdef __CUDA_ARCH__
+            return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+#else
+            return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx );
+#endif
+         }
+         return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ];
+      }
+
+      /***
+       * \brief Returns number of elements that needs to be allocated.
+       */
+      template< typename CSROffsets >
+      __cuda_callable__
+      static IndexType getStorageSize( const CSROffsets& offsets )
+      {
+         if( ! std::is_same< DeviceType, Devices::Host >::value )
+         {
+#ifdef __CUDA_ARCH__
+            return offsets[ getSegmentsCount( offsets ) ];
+#else
+            return offsets.getElement( getSegmentsCount( offsets ) );
+#endif
+         }
+         return offsets[ getSegmentsCount( offsets ) ];
+      }
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+};
+         } // namespace details
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..ecfe63107325793717482b3710c9533a153c34c1
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/Ellpack.h
@@ -0,0 +1,105 @@
+/***************************************************************************
+                          Ellpack.h -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int Alignment = 32 >
+class Ellpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      static constexpr int getAlignment() { return Alignment; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using SegmentsSizes = OffsetsHolder;
+
+      Ellpack();
+
+      Ellpack( const SegmentsSizes& sizes );
+
+      Ellpack( const IndexType segmentsCount, const IndexType segmentSize );
+
+      Ellpack( const Ellpack& segments );
+
+      Ellpack( const Ellpack&& segments );
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize );
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType segmentSize, size, alignedSize;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..6f185bc469e1c1826348b5662735d6a2992fc087
--- /dev/null
+++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h
@@ -0,0 +1,104 @@
+/***************************************************************************
+                          SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 4, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Containers {
+      namespace Segments {
+
+template< typename Device,
+          typename Index,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          int SliceSize = 32 >
+class SlicedEllpack
+{
+   public:
+
+      using DeviceType = Device;
+      using IndexType = Index;
+      using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >;
+      static constexpr int getSliceSize() { return SliceSize; }
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; }
+
+      SlicedEllpack();
+
+      SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes );
+
+      SlicedEllpack( const SlicedEllpack& segments );
+
+      SlicedEllpack( const SlicedEllpack&& segments );
+
+      /**
+       * \brief Set sizes of particular segments.
+       */
+      template< typename SizesHolder = OffsetsHolder >
+      void setSegmentsSizes( const SizesHolder& sizes );
+
+      __cuda_callable__
+      IndexType getSegmentsCount() const;
+
+      __cuda_callable__
+      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+
+      /**
+       * \brief Number segments.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+
+      __cuda_callable__
+      IndexType getStorageSize() const;
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
+
+      __cuda_callable__
+      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
+
+      /***
+       * \brief Go over all segments and for each segment element call
+       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
+       * is terminated.
+       */
+      template< typename Function, typename... Args >
+      void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const;
+
+      template< typename Function, typename... Args >
+      void forAll( Function& f, Args... args ) const;
+
+
+      /***
+       * \brief Go over all segments and perform a reduction in each of them.
+       */
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
+      void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+   protected:
+
+      IndexType size, alignedSize, segmentsCount;
+
+      OffsetsHolder sliceOffsets, sliceSegmentSizes;
+};
+
+      } // namespace Segements
+   }  // namespace Conatiners
+} // namespace TNL
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index c469927234cd835bef7bcfe36599a47cb843b6cc..ada48ee0297438c717772433fb6a09972f2d49e8 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -10,214 +10,220 @@
 
 #pragma once
 
+#include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
+#include <TNL/Matrices/DenseMatrixRowView.h>
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/DenseRow.h>
-#include <TNL/Containers/Array.h>
+#include <TNL/Matrices/DenseMatrixView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class DenseDeviceDependentCode;
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Dense : public Matrix< Real, Device, Index >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
-   // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Dense;
-
-public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef DenseRow< Real, Index > MatrixRow;
+      // TODO: remove this
+      using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+      using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
 
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Dense< _Real, _Device, _Index >;
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Dense< _Real, _Device, _Index >;
 
-   Dense();
-
-   static String getSerializationType();
+      Dense();
 
-   virtual String getSerializationTypeVirtual() const;
-
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
-
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Dense< Real2, Device2, Index2 >& matrix );
+      Dense( const IndexType rows, const IndexType columns );
 
-   /****
-    * This method is only for the compatibility with the sparse matrices.
-    */
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
-
-   /****
-    * Returns maximal number of the nonzero matrix elements that can be stored
-    * in a given row.
-    */
-   IndexType getRowLength( const IndexType row ) const;
-
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
-
-   IndexType getMaxRowLength() const;
-
-   IndexType getNumberOfMatrixElements() const;
-
-   IndexType getNumberOfNonzeroMatrixElements() const;
-
-   void reset();
-
-   void setValue( const RealType& v );
-
-   __cuda_callable__
-   Real& operator()( const IndexType row,
-                     const IndexType column );
-
-   __cuda_callable__
-   const Real& operator()( const IndexType row,
-                           const IndexType column ) const;
-
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
-
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
-
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
+      ViewType getView();
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      ConstViewType getConstView() const;
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements );
+      static String getSerializationType();
 
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements );
+      virtual String getSerializationTypeVirtual() const;
 
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements,
-                    const RealType& thisRowMultiplicator = 1.0 );
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements,
-                const RealType& thisRowMultiplicator = 1.0 );
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
 
-   __cuda_callable__
-   const Real& getElementFast( const IndexType row,
-                               const IndexType column ) const;
+      /****
+       * This method is only for the compatibility with the sparse matrices.
+       */
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
-   Real getElement( const IndexType row,
-                    const IndexType column ) const;
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   /*void getRow( const IndexType row,
-                IndexType* columns,
-                RealType* values ) const;*/
+      IndexType getMaxRowLength() const;
 
-   __cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      IndexType getNumberOfMatrixElements() const;
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      void reset();
 
-   template< typename InVector, typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   template< typename Matrix >
-   void addMatrix( const Matrix& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
 
-   template< typename Matrix1, typename Matrix2, int tileDim = 32 >
-   void getMatrixProduct( const Matrix1& matrix1,
-                       const Matrix2& matrix2,
-                       const RealType& matrix1Multiplicator = 1.0,
-                       const RealType& matrix2Multiplicator = 1.0 );
 
-   template< typename Matrix, int tileDim = 32 >
-   void getTransposition( const Matrix& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      void setValue( const RealType& v );
 
-   template< typename Vector1, typename Vector2 >
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      __cuda_callable__
+      Real& operator()( const IndexType row,
+                        const IndexType column );
 
-   // copy assignment
-   Dense& operator=( const Dense& matrix );
-
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+      __cuda_callable__
+      const Real& operator()( const IndexType row,
+                              const IndexType column ) const;
 
-   void save( const String& fileName ) const;
-
-   void load( const String& fileName );
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   void save( File& file ) const;
-
-   void load( File& file );
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   void print( std::ostream& str ) const;
+      Real getElement( const IndexType row,
+                       const IndexType column ) const;
 
-protected:
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector, typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Matrix >
+      void addMatrix( const Matrix& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Matrix1, typename Matrix2, int tileDim = 32 >
+      void getMatrixProduct( const Matrix1& matrix1,
+                          const Matrix2& matrix2,
+                          const RealType& matrix1Multiplicator = 1.0,
+                          const RealType& matrix2Multiplicator = 1.0 );
+
+      template< typename Matrix, int tileDim = 32 >
+      void getTransposition( const Matrix& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      /**
+       * \brief Assignment operator for exactly the same type of the dense matrix.
+       * 
+       * @param matrix
+       * @return 
+       */
+      Dense& operator=( const Dense& matrix );
+
+      /**
+       * \brief Assignment operator for other dense matrices.
+       * 
+       * @param matrix
+       * @return 
+       */
+      template< typename RHSReal, typename RHSDevice, typename RHSIndex,
+                 bool RHSRowMajorOrder, typename RHSRealAllocator >
+      Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix );
+
+      /**
+       * \brief Assignment operator for other (sparse) types of matrices.
+       * @param matrix
+       * @return 
+       */
+      template< typename RHSMatrix >
+      Dense& operator=( const RHSMatrix& matrix );
+
+      template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+      bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+      bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
+
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void print( std::ostream& str ) const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType column ) const;
+
+      typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      friend class DenseDeviceDependentCode< DeviceType >;
+
+      SegmentsType segments;
 
-   typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class DenseDeviceDependentCode< DeviceType >;
+      ViewType view;
 };
 
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Dense_impl.h>
+#include <TNL/Matrices/Dense.hpp>
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense.hpp
similarity index 54%
rename from src/TNL/Matrices/Dense_impl.h
rename to src/TNL/Matrices/Dense.hpp
index 246bd09edb459e6df9749af9d1589f508c2c5806..346c26ed8a628d5737e3dce143340a42364e5a9a 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          Dense_impl.h  -  description
+                          Dense.hpp  -  description
                              -------------------
     begin                : Nov 29, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -15,412 +15,400 @@
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Real,
           typename Device,
-          typename Index >
-Dense< Real, Device, Index >::Dense()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense()
 {
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getSerializationType()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Dense( const IndexType rows, const IndexType columns )
 {
-   return String( "Matrices::Dense< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
+   this->setDimensions( rows, columns );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Dense< Real, Device, Index >::getSerializationTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getView() -> ViewType
 {
-   return this->getSerializationType();
+   return ViewType( this->getRows(),
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->segments.getView() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                  const IndexType columns )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getConstView() const -> ConstViewType
 {
-   Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   this->values.setSize( rows * columns );
-   this->values.setValue( 0.0 );
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->segments.getConstView() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2,
-             typename Device2,
-             typename Index2 >
-void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationType()
 {
-   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   return ViewType::getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationTypeVirtual() const
 {
+   return this->getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setDimensions( const IndexType rows,
+               const IndexType columns )
 {
-   return this->getColumns();
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   this->segments.setSegmentsSizes( rows, columns );
+   this->values.setSize( rows * columns );
+   this->values = 0.0;
+   this->view = this->getView();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Matrix_ >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setLike( const Matrix_& matrix )
 {
-   return this->getColumns();
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getMaxRowLength() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
-   return this->getColumns();
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "" );
+   TNL_ASSERT_LE( max( rowLengths ), this->getColumns(), "" );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getNumberOfMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
 {
-   return this->getRows() * this->getColumns();
+   this->view.getCompressedRowLengths( rowLengths );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength( const IndexType row ) const
 {
-   IndexType nonzeroElements( 0 );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      for( IndexType column = 0; column < this->getColumns(); column++ )
-         if( this->getElement( row, column ) != 0 )
-            nonzeroElements++;
-   return nonzeroElements;
+   return this->getColumns();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::reset()
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMaxRowLength() const
 {
-   Matrix< Real, Device, Index >::reset();
-   this->values.reset();
+   return this->getColumns();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::setValue( const Real& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfMatrixElements() const
 {
-   this->values.setValue( value );
+   return this->getRows() * this->getColumns();
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Real& Dense< Real, Device, Index >::operator()( const IndexType row,
-                                                const IndexType column )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   return this->values.operator[]( this->getElementIndex( row, column ) );
+   return this->view.getNumberOfNonzeroMatrixElements();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-const Real& Dense< Real, Device, Index >::operator()( const IndexType row,
-                                                      const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset()
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   return this->values.operator[]( this->getElementIndex( row, column ) );
+   Matrix< Real, Device, Index >::reset();
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Dense< Real, Device, Index >::setElementFast( const IndexType row,
-                                                            const IndexType column,
-                                                            const RealType& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value )
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   this->values.operator[]( this->getElementIndex( row, column ) ) = value;
-   return true;
+   this->view.setValue( value );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::setElement( const IndexType row,
-                                               const IndexType column,
-                                               const RealType& value )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__ auto
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
+   return this->view.getRow( rowIdx );
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Dense< Real, Device, Index >::addElementFast( const IndexType row,
-                                                   const IndexType column,
-                                                   const RealType& value,
-                                                   const RealType& thisElementMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__ auto
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
-
-   const IndexType elementIndex = this->getElementIndex( row, column );
-   if( thisElementMultiplicator == 1.0 )
-      this->values.operator[]( elementIndex ) += value;
-   else
-      this->values.operator[]( elementIndex ) =
-         thisElementMultiplicator * this->values.operator[]( elementIndex ) + value;
-   return true;
+   return this->view.getRow( rowIdx );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::addElement( const IndexType row,
-                                                        const IndexType column,
-                                                        const RealType& value,
-                                                        const RealType& thisElementMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
+                                                const IndexType column )
 {
-   const IndexType elementIndex = this->getElementIndex( row, column );
-   if( thisElementMultiplicator == 1.0 )
-      this->values.setElement( elementIndex,
-                               this->values.getElement( elementIndex ) + value );
-   else
-      this->values.setElement( elementIndex,
-                               thisElementMultiplicator * this->values.getElement( elementIndex ) + value );
-   return true;
+   return this->view.operator()( row, column );
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
 __cuda_callable__
-bool Dense< Real, Device, Index >::setRowFast( const IndexType row,
-                                                        const IndexType* columns,
-                                                        const RealType* values,
-                                                        const IndexType elements )
+const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row,
+                                                      const IndexType column ) const
 {
-   TNL_ASSERT( elements <= this->getColumns(),
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->getColumns() );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElementFast( row, columns[ i ], values[ i ] );
-   return true;
+   return this->view.operator()( row, column );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::setRow( const IndexType row,
-                                                    const IndexType* columns,
-                                                    const RealType* values,
-                                                    const IndexType elements )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
 {
-   TNL_ASSERT( elements <= this->getColumns(),
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->getColumns() );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElement( row, columns[ i ], values[ i ] );
-   return true;
+   this->view.setElement( row, column, value );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Dense< Real, Device, Index >::addRowFast( const IndexType row,
-                                                        const IndexType* columns,
-                                                        const RealType* values,
-                                                        const IndexType elements,
-                                                        const RealType& thisRowMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElementFast( row, columns[ i ],
-                            thisRowMultiplicator * this->getElementFast( row, columns[ i ] ) + values[ i ] );
-   return true;
+   this->view.addElement( row, column, value, thisElementMultiplicator );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Dense< Real, Device, Index >::addRow( const IndexType row,
-                                                    const IndexType* columns,
-                                                    const RealType* values,
-                                                    const IndexType elements,
-                                                    const RealType& thisRowMultiplicator )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Real
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElement( const IndexType row,
+            const IndexType column ) const
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   for( IndexType i = 0; i < elements; i++ )
-      this->setElement( row, columns[ i ],
-                        thisRowMultiplicator * this->getElement( row, columns[ i ] ) + values[ i ] );
-   return true;
+   return this->view.getElement( row, column );
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row,
-                                                          const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
-   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
-   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
-   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
-   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+}
 
-   return this->values.operator[]( this->getElementIndex( row, column ) );
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Real Dense< Real, Device, Index >::getElement( const IndexType row,
-                                                        const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
 {
-   return this->values.getElement( this->getElementIndex( row, column ) );
+   this->view.forRows( first, last, function );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-void Dense< Real, Device, Index >::getRowFast( const IndexType row,
-                                                        IndexType* columns,
-                                                        RealType* values ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
 {
-   for( IndexType i = 0; i < this->getColumns(); i++ )
-   {
-      columns[ i ] = i;
-      values[ i ] = this->getElementFast( row, i );
-   }
+   this->view.forRows( first, last, function );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-typename Dense< Real, Device, Index >::MatrixRow
-Dense< Real, Device, Index >::
-getRow( const IndexType rowIndex )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function ) const
 {
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        this->rows );
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-const typename Dense< Real, Device, Index >::MatrixRow
-Dense< Real, Device, Index >::
-getRow( const IndexType rowIndex ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function )
 {
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ],
-                        this->columns,
-                        this->rows );
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const IndexType row,
+typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
                                                                                    const Vector& vector ) const
 {
-   RealType sum( 0.0 );
-   for( IndexType column = 0; column < this->getColumns(); column++ )
-      sum += this->getElementFast( row, column ) * vector[ column ];
-   return sum;
+   return this->view.rowVectorProduct( row, vector );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector,
-                                                           OutVector& outVector ) const
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
-
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   this->view.vectorProduct( inVector, outVector );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix >
-void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Matrix& matrix,
                                               const RealType& matrixMultiplicator,
                                               const RealType& thisMatrixMultiplicator )
 {
@@ -440,6 +428,8 @@ void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix,
 #ifdef HAVE_CUDA
 template< typename Real,
           typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
           typename Matrix1,
           typename Matrix2,
           int tileDim,
@@ -538,9 +528,11 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix1, typename Matrix2, int tileDim >
-void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrixProduct( const Matrix1& matrix1,
                                                               const Matrix2& matrix2,
                                                               const RealType& matrix1Multiplicator,
                                                               const RealType& matrix2Multiplicator )
@@ -628,6 +620,8 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1,
 template< typename Real,
           typename Index,
           typename Matrix,
+          bool RowMajorOrder,
+          typename RealAllocator,
           int tileDim,
           int tileRowBlockSize >
 __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
@@ -696,6 +690,8 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind
 
 template< typename Real,
           typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
           typename Matrix,
           int tileDim,
           int tileRowBlockSize >
@@ -776,9 +772,11 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Matrix, int tileDim >
-void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Matrix& matrix,
                                                               const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getColumns() == matrix.getRows() &&
@@ -787,7 +785,7 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
                     << "This matrix rows: " << this->getRows() << std::endl
                     << "That matrix columns: " << matrix.getColumns() << std::endl
                     << "That matrix rows: " << matrix.getRows() << std::endl );
- 
+
    if( std::is_same< Device, Devices::Host >::value )
    {
       const IndexType& rows = matrix.getRows();
@@ -867,9 +865,11 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
    template< typename Vector1, typename Vector2 >
-void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b,
                                                         const IndexType row,
                                                         Vector2& x,
                                                         const RealType& omega ) const
@@ -878,148 +878,282 @@ void Dense< Real, Device, Index >::performSORIteration( const Vector1& b,
    for( IndexType i = 0; i < this->getColumns(); i++ )
    {
       if( i == row )
-         diagonalValue = this->getElementFast( row, row );
+         diagonalValue = this->getElement( row, row );
       else
-         sum += this->getElementFast( row, i ) * x[ i ];
+         sum += this->getElement( row, i ) * x[ i ];
    }
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
 }
 
-
-// copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-Dense< Real, Device, Index >&
-Dense< Real, Device, Index >::operator=( const Dense& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix )
 {
-   this->setLike( matrix );
+   setLike( matrix );
    this->values = matrix.values;
    return *this;
 }
 
-// cross-device copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2, typename >
-Dense< Real, Device, Index >&
-Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename RHSReal, typename RHSDevice, typename RHSIndex,
+             bool RHSRowMajorOrder, typename RHSRealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix )
 {
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
-                  "unknown device" );
+   using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
 
    this->setLike( matrix );
+   if( RowMajorOrder == RHSRowMajorOrder )
+   {
+      this->values = matrix.getValues();
+      return *this;
+   }
 
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet.");
+   auto this_view = this->view;
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+         this_view( rowIdx, columnIdx ) = value;
+      };
+      matrix.forAllRows( f );
+   }
+   else
+   {
+      const IndexType maxRowLength = matrix.getColumns();
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx;
+            matrixValuesBuffer_view[ bufferIdx ] = value;
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix.
+         auto this_view = this->view;
+         auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable {
+            IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx;
+            this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
+         };
+         Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
+         baseRow += bufferRowsCount;
+      }
+   }
+   return *this;
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::save( const String& fileName ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename RHSMatrix >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const RHSMatrix& matrix )
 {
-   Object::save( fileName );
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+
+   // TODO: use getConstView when it works
+   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
+   auto values_view = this->values.getView();
+   RHSIndexType padding_index = matrix.getPaddingIndex();
+   this->values = 0.0;
+
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+         if( value != 0.0 && columnIdx != padding_index )
+            values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value;
+      };
+      matrix.forAllRows( f );
+   }
+   else
+   {
+      const IndexType maxRowLength = max( rowLengths );
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+         thisColumnsBuffer = padding_index;
+         matrixColumnsBuffer_view = padding_index;
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+            if( columnIndex != padding_index )
+            {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
+               matrixValuesBuffer_view[ bufferIdx ] = value;
+            }
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+         thisColumnsBuffer_view = matrixColumnsBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix
+         auto this_view = this->view;
+         auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable {
+            IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx;
+            IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ];
+            if( columnIdx != padding_index )
+               this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
+         };
+         Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
+         baseRow += bufferRowsCount;
+      }
+   }
+   this->view = this->getView();
+   return *this;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::load( const String& fileName )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+bool
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const
 {
-   Object::load( fileName );
+   return( this->getRows() == matrix.getRows() &&
+           this->getColumns() == matrix.getColumns() &&
+           this->getValues() == matrix.getValues() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::save( File& file ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
+bool
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const
 {
-   Matrix< Real, Device, Index >::save( file );
+   return ! ( *this == matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::load( File& file )
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const
 {
-   Matrix< Real, Device, Index >::load( file );
+   this->view.save( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Dense< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName )
 {
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      for( IndexType column = 0; column < this->getColumns(); column++ )
-         str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
-      str << std::endl;
-   }
+   Object::load( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Index Dense< Real, Device, Index >::getElementIndex( const IndexType row,
-                                                              const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
-   TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value ||
-          std::is_same< Device, Devices::Cuda >::value ), )
-   if( std::is_same< Device, Devices::Host >::value )
-      return row * this->columns + column;
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return column * this->rows + row;
-   return -1;
+   this->view.save( file );
 }
 
-template<>
-class DenseDeviceDependentCode< Devices::Host >
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
+   Matrix< Real, Device, Index >::load( file );
+   this->segments.load( file );
+   this->view = this->getView();
+}
 
-template<>
-class DenseDeviceDependentCode< Devices::Cuda >
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
 {
-   public:
-
-      typedef Devices::Cuda Device;
+   this->view.print( str );
+}
 
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Dense< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Index
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElementIndex( const IndexType row, const IndexType column ) const
+{
+   return this->segments.getGlobalIndex( row, column );
+}
 
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h
new file mode 100644
index 0000000000000000000000000000000000000000..84c6b141cd7f7cdf25be8e550e573680b4cce902
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixRowView.h
@@ -0,0 +1,52 @@
+/***************************************************************************
+                          DenseMatrixRowView.h -  description
+                             -------------------
+    begin                : Jan 3, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename SegmentView,
+          typename ValuesView >
+class DenseMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using SegmentViewType = SegmentView;
+      using IndexType = typename SegmentViewType::IndexType;
+      using ValuesViewType = ValuesView;
+
+      __cuda_callable__
+      DenseMatrixRowView( const SegmentViewType& segmentView,
+                          const ValuesViewType& values );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType column ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType column );
+
+      __cuda_callable__
+      void setElement( const IndexType column,
+                       const RealType& value );
+   protected:
+
+      SegmentViewType segmentView;
+
+      ValuesViewType values;
+};
+   } // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/DenseMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/DenseMatrixRowView.hpp b/src/TNL/Matrices/DenseMatrixRowView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1962a4d9a8eabe80f28b2e21d1f0506792949225
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixRowView.hpp
@@ -0,0 +1,71 @@
+/***************************************************************************
+                          DenseMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Jan 3, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/DenseMatrixRowView.h>
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__
+DenseMatrixRowView< SegmentView, ValuesView >::
+DenseMatrixRowView( const SegmentViewType& segmentView,
+                     const ValuesViewType& values )
+ : segmentView( segmentView ), values( values )
+{
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getSize() const -> IndexType
+{
+   return segmentView.getSize();
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getValue( const IndexType column ) const -> const RealType&
+{
+   TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
+   return values[ segmentView.getGlobalIndex( column ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getValue( const IndexType column ) -> RealType&
+{
+   TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
+   return values[ segmentView.getGlobalIndex( column ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ void 
+DenseMatrixRowView< SegmentView, ValuesView >::
+setElement( const IndexType column,
+            const RealType& value )
+{
+   TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
+   const IndexType globalIdx = segmentView.getGlobalIndex( column );
+   values[ globalIdx ] = value;
+}
+
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
new file mode 100644
index 0000000000000000000000000000000000000000..95a7c47698fc27f7fa760a64c0176a147ebe391c
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -0,0 +1,197 @@
+/***************************************************************************
+                          DenseMatrixView.h  -  description
+                             -------------------
+    begin                : Nov 29, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Allocators/Default.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Matrices/DenseMatrixRowView.h>
+#include <TNL/Matrices/MatrixView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class DenseMatrixView : public MatrixView< Real, Device, Index >
+{
+   private:
+      // convenient template alias for controlling the selection of copy-assignment operator
+      template< typename Device2 >
+      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+
+      // friend class will be needed for templated assignment operators
+      //template< typename Real2, typename Device2, typename Index2 >
+      //friend class Dense;
+
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = Matrix< Real, Device, Index >;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
+      using SegmentsViewType = typename SegmentsType::ViewType;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+      using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+
+
+      // TODO: remove this
+      using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+      using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = DenseMatrixView< _Real, _Device, _Index >;
+
+      __cuda_callable__
+      DenseMatrixView();
+
+      __cuda_callable__
+      DenseMatrixView( const IndexType rows,
+                       const IndexType columns,
+                       const ValuesViewType& values,
+                       const SegmentsViewType& segments );
+
+      __cuda_callable__
+      DenseMatrixView( const DenseMatrixView& m ) = default;
+
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getMaxRowLength() const;
+
+      IndexType getNumberOfMatrixElements() const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      void reset();
+
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+
+      void setValue( const RealType& v );
+
+      __cuda_callable__
+      Real& operator()( const IndexType row,
+                        const IndexType column );
+
+      __cuda_callable__
+      const Real& operator()( const IndexType row,
+                              const IndexType column ) const;
+
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      Real getElement( const IndexType row,
+                       const IndexType column ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector, typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Matrix >
+      void addMatrix( const Matrix& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Matrix1, typename Matrix2, int tileDim = 32 >
+      void getMatrixProduct( const Matrix1& matrix1,
+                          const Matrix2& matrix2,
+                          const RealType& matrix1Multiplicator = 1.0,
+                          const RealType& matrix2Multiplicator = 1.0 );
+
+      template< typename Matrix, int tileDim = 32 >
+      void getTransposition( const Matrix& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      DenseMatrixView& operator=( const DenseMatrixView& matrix );
+
+      void save( const String& fileName ) const;
+
+      void save( File& file ) const;
+
+      void print( std::ostream& str ) const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType column ) const;
+
+      //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      //friend class DenseDeviceDependentCode< DeviceType >;
+
+      SegmentsViewType segments;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/DenseMatrixView.hpp>
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..01415ec21c2446a255db57aae7df04cbe5813ed8
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -0,0 +1,703 @@
+/***************************************************************************
+                          DenseMatrixView.hpp  -  description
+                             -------------------
+    begin                : Nov 29, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+DenseMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+DenseMatrixView( const IndexType rows,
+                 const IndexType columns,
+                 const ValuesViewType& values,
+                 const SegmentsViewType& segments )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getView() -> ViewType
+{
+   return ViewType( this->getRows(),
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->columnIndexes.getView(),
+                    this->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->getColumnsIndexes().getConstView(),
+                         this->segments.getConstView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
+{
+   return String( "Matrices::Dense< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const
+{
+   return this->getColumns();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const
+{
+   return this->getColumns();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const
+{
+   return this->getRows() * this->getColumns();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset()
+{
+   Matrix< Real, Device, Index >::reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value )
+{
+   this->values = value;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__ auto
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__ auto
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
+                                                const IndexType column )
+{
+   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
+   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
+
+   return this->values.operator[]( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row,
+                                                      const IndexType column ) const
+{
+   TNL_ASSERT_GE( row, 0, "Row index must be non-negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." );
+   TNL_ASSERT_GE( column, 0, "Column index must be non-negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." );
+
+   return this->values.operator[]( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
+{
+   this->values.setElement( this->getElementIndex( row, column ), value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   const IndexType elementIndex = this->getElementIndex( row, column );
+   if( thisElementMultiplicator == 1.0 )
+      this->values.setElement( elementIndex,
+                               this->values.getElement( elementIndex ) + value );
+   else
+      this->values.setElement( elementIndex,
+                               thisElementMultiplicator * this->values.getElement( elementIndex ) + value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Real
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row,
+            const IndexType column ) const
+{
+   return this->values.getElement( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
+      function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ], compute );
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
+      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ], compute );
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector >
+__cuda_callable__
+typename Vector::RealType
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+   RealType sum( 0.0 );
+   // TODO: Fix this
+   //for( IndexType column = 0; column < this->getColumns(); column++ )
+   //   sum += this->getElementFast( row, column ) * vector[ column ];
+   return sum;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename InVector,
+             typename OutVector >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType {
+      return valuesView[ offset ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Matrix >
+void
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const Matrix& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT( this->getColumns() == matrix.getColumns() &&
+              this->getRows() == matrix.getRows(),
+            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                 << "This matrix rows: " << this->getRows() << std::endl
+                 << "That matrix columns: " << matrix.getColumns() << std::endl
+                 << "That matrix rows: " << matrix.getRows() << std::endl );
+
+   if( thisMatrixMultiplicator == 1.0 )
+      this->values += matrixMultiplicator * matrix.values;
+   else
+      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Matrix1, typename Matrix2, int tileDim >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1,
+                                                              const Matrix2& matrix2,
+                                                              const RealType& matrix1Multiplicator,
+                                                              const RealType& matrix2Multiplicator )
+{
+   TNL_ASSERT( matrix1.getColumns() == matrix2.getRows() &&
+              this->getRows() == matrix1.getRows() &&
+              this->getColumns() == matrix2.getColumns(),
+            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                 << "This matrix rows: " << this->getRows() << std::endl
+                 << "Matrix1 columns: " << matrix1.getColumns() << std::endl
+                 << "Matrix1 rows: " << matrix1.getRows() << std::endl
+                 << "Matrix2 columns: " << matrix2.getColumns() << std::endl
+                 << "Matrix2 rows: " << matrix2.getRows() << std::endl );
+
+   if( std::is_same< Device, Devices::Host >::value )
+      for( IndexType i = 0; i < this->getRows(); i += tileDim )
+         for( IndexType j = 0; j < this->getColumns(); j += tileDim )
+         {
+            const IndexType tileRows = min( tileDim, this->getRows() - i );
+            const IndexType tileColumns = min( tileDim, this->getColumns() - j );
+            for( IndexType i1 = i; i1 < i + tileRows; i1++ )
+               for( IndexType j1 = j; j1 < j + tileColumns; j1++ )
+                  this->setElementFast( i1, j1, 0.0 );
+
+            for( IndexType k = 0; k < matrix1.getColumns(); k += tileDim )
+            {
+               const IndexType lastK = min( k + tileDim, matrix1.getColumns() );
+               for( IndexType i1 = 0; i1 < tileRows; i1++ )
+                  for( IndexType j1 = 0; j1 < tileColumns; j1++ )
+                     for( IndexType k1 = k; k1 < lastK; k1++ )
+                        this->addElementFast( i + i1, j + j1,
+                            matrix1.getElementFast( i + i1, k1 ) * matrix2.getElementFast( k1, j + j1 ) );
+            }
+         }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      const IndexType matrixProductCudaBlockSize( 256 );
+      const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
+      const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
+      const IndexType cudaBlockColumns( tileDim );
+      const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
+      cudaBlockSize.x = cudaBlockColumns;
+      cudaBlockSize.y = cudaBlockRows;
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
+
+      for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
+         for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
+         {
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
+            if( gridIdx_x == columnGrids - 1 )
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
+            if( gridIdx_y == rowGrids - 1 )
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
+            Dense* this_kernel = Cuda::passToDevice( *this );
+            Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 );
+            Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 );
+            DenseMatrixProductKernel< Real,
+                                               Index,
+                                               Matrix1,
+                                               Matrix2,
+                                               tileDim,
+                                               cudaBlockRows >
+                                           <<< cudaGridSize,
+                                               cudaBlockSize,
+                                               3*tileDim*tileDim >>>
+                                             ( this_kernel,
+                                               matrix1_kernel,
+                                               matrix2_kernel,
+                                               matrix1Multiplicator,
+                                               matrix2Multiplicator,
+                                               gridIdx_x,
+                                               gridIdx_y );
+            Cuda::freeFromDevice( this_kernel );
+            Cuda::freeFromDevice( matrix1_kernel );
+            Cuda::freeFromDevice( matrix2_kernel );
+         }*/
+#endif
+   }
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Matrix, int tileDim >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix,
+                                                              const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getColumns() == matrix.getRows() &&
+              this->getRows() == matrix.getColumns(),
+               std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                    << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix columns: " << matrix.getColumns() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      const IndexType& columns = matrix.getColumns();
+      for( IndexType i = 0; i < rows; i += tileDim )
+         for( IndexType j = 0; j < columns; j += tileDim )
+            for( IndexType k = i; k < i + tileDim && k < rows; k++ )
+               for( IndexType l = j; l < j + tileDim && l < columns; l++ )
+                  this->setElement( l, k, matrixMultiplicator * matrix. getElement( k, l ) );
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
+      const IndexType matrixProductCudaBlockSize( 256 );
+      const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim );
+      const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim );
+      const IndexType cudaBlockColumns( tileDim );
+      const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim );
+      cudaBlockSize.x = cudaBlockColumns;
+      cudaBlockSize.y = cudaBlockRows;
+      const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() );
+      const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() );
+      const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks();
+
+      Dense* this_device = Cuda::passToDevice( *this );
+      Matrix* matrix_device = Cuda::passToDevice( matrix );
+
+      for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ )
+         for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ )
+         {
+            cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize();
+            if( gridIdx_x == columnGrids - 1)
+               cudaGridSize.x = columnTiles % Cuda::getMaxGridSize();
+            if( gridIdx_y == rowGrids - 1 )
+               cudaGridSize.y = rowTiles % Cuda::getMaxGridSize();
+            if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) &&
+                ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) )
+            {
+               DenseTranspositionAlignedKernel< Real,
+                                                         Index,
+                                                         Matrix,
+                                                         tileDim,
+                                                         cudaBlockRows >
+                                                     <<< cudaGridSize,
+                                                         cudaBlockSize,
+                                                         sharedMemorySize  >>>
+                                                       ( this_device,
+                                                         matrix_device,
+                                                         matrixMultiplicator,
+                                                         gridIdx_x,
+                                                         gridIdx_y );
+            }
+            else
+            {
+               DenseTranspositionNonAlignedKernel< Real,
+                                                         Index,
+                                                         Matrix,
+                                                         tileDim,
+                                                         cudaBlockRows >
+                                                     <<< cudaGridSize,
+                                                         cudaBlockSize,
+                                                         sharedMemorySize  >>>
+                                                       ( this_device,
+                                                         matrix_device,
+                                                         matrixMultiplicator,
+                                                         gridIdx_x,
+                                                         gridIdx_y );
+            }
+            TNL_CHECK_CUDA_DEVICE;
+         }
+      Cuda::freeFromDevice( this_device );
+      Cuda::freeFromDevice( matrix_device );*/
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector1, typename Vector2 >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b,
+                                                        const IndexType row,
+                                                        Vector2& x,
+                                                        const RealType& omega ) const
+{
+   RealType sum( 0.0 ), diagonalValue;
+   for( IndexType i = 0; i < this->getColumns(); i++ )
+   {
+      if( i == row )
+         diagonalValue = this->getElement( row, row );
+      else
+         sum += this->getElement( row, i ) * x[ i ];
+   }
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+DenseMatrixView< Real, Device, Index, RowMajorOrder >&
+DenseMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const DenseMatrixView& matrix )
+{
+   MatrixView< Real, Device, Index >::operator=( matrix );
+   this->segments = matrix.segments;
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
+{
+   MatrixView< Real, Device, Index >::save( file );
+   this->segments.save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+         str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row,
+                                                              const IndexType column ) const
+{
+   return this->segments.getGlobalIndex( row, column );
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h
index 76b6ea8c1d5173ee8d0cd85421d919085fe590e5..05ee2839152940503f385d883575c9e5730041a2 100644
--- a/src/TNL/Matrices/DistributedMatrix.h
+++ b/src/TNL/Matrices/DistributedMatrix.h
@@ -14,7 +14,7 @@
 
 #include <type_traits>
 
-#include <TNL/Matrices/SparseRow.h>
+#include <TNL/Matrices/Legacy/SparseRow.h>
 #include <TNL/Communicators/MpiCommunicator.h>
 #include <TNL/Containers/Subrange.h>
 #include <TNL/Containers/DistributedVector.h>
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index b2abd13c537dc181de638caec4b6adf06755b2bf..a4711dce0e5b1a0a984966a7f997ae6549c0b3e4 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <utility>  // std::pair
 #include <limits>   // std::numeric_limits
+#include <TNL/Allocators/Host.h>
 #include <TNL/Matrices/Dense.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
@@ -124,8 +125,8 @@ public:
       preCommPatternEnds.setLike( commPatternEnds );
       for( int j = 0; j < nproc; j++ )
       for( int i = 0; i < nproc; i++ ) {
-         preCommPatternStarts.setElementFast( j, i, span_starts.getElement( i ) );
-         preCommPatternEnds.setElementFast( j, i, span_ends.getElement( i ) );
+         preCommPatternStarts.setElement( j, i, span_starts.getElement( i ) );
+         preCommPatternEnds.setElement( j, i, span_ends.getElement( i ) );
       }
 
       // assemble the commPattern* matrices
@@ -175,7 +176,7 @@ public:
              continue;
          if( commPatternStarts( rank, j ) < commPatternEnds( rank, j ) )
             commRequests.push_back( CommunicatorType::IRecv(
-                     &globalBuffer[ commPatternStarts( rank, j ) ],
+                     globalBuffer.getPointer( commPatternStarts( rank, j ) ),
                      commPatternEnds( rank, j ) - commPatternStarts( rank, j ),
                      j, 0, group ) );
       }
@@ -235,7 +236,7 @@ public:
 
 protected:
    // communication pattern
-   Matrices::Dense< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds;
+   Matrices::Dense< IndexType, Devices::Host, int, true, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds;
 
    // span of rows with only block-diagonal entries
    std::pair< IndexType, IndexType > localOnlySpan;
diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h
similarity index 95%
rename from src/TNL/Matrices/AdEllpack.h
rename to src/TNL/Matrices/Legacy/AdEllpack.h
index f011e6c804429b4059b972b5249feaa1de5f8922..1135084ee9346375ad1f14bde3ba8453d0ac5868 100644
--- a/src/TNL/Matrices/AdEllpack.h
+++ b/src/TNL/Matrices/Legacy/AdEllpack.h
@@ -10,15 +10,15 @@
 
 /****
  * This class implements AdELL format from:
- * 
- * Maggioni M., Berger-Wolf T., 
+ *
+ * Maggioni M., Berger-Wolf T.,
  * AdELL: An Adaptive Warp-Balancing ELL Format for Efficient Sparse Matrix-Vector Multiplication on GPUs,
  * In proceedings of 42nd International Conference on Parallel Processing, 2013.
  */
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -33,7 +33,7 @@ struct warpInfo
     using RealType = typename MatrixType::RealType;
     using DeviceType = typename MatrixType::DeviceType;
     using IndexType = typename MatrixType::IndexType;
-    
+
     IndexType offset;
     IndexType rowOffset;
     IndexType localLoad;
@@ -47,7 +47,7 @@ template< typename MatrixType >
 class warpList
 {
 public:
-    
+
     using RealType = typename MatrixType::RealType;
     using DeviceType = typename MatrixType::DeviceType;
     using IndexType = typename MatrixType::IndexType;
@@ -74,7 +74,7 @@ public:
     { return this->tail; }
 
     ~warpList();
-    
+
     void printList()
     {
         if( this->getHead() == this->getTail() )
@@ -114,7 +114,7 @@ private:
    // friend class will be needed for templated assignment operators
    template< typename Real2, typename Device2, typename Index2 >
    friend class AdEllpack;
-   
+
 public:
 
     typedef Real RealType;
@@ -122,6 +122,7 @@ public:
     typedef Index IndexType;
     typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
     typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
 
     template< typename _Real = Real,
               typename _Device = Device,
@@ -132,6 +133,8 @@ public:
 
     void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+    void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
     IndexType getWarp( const IndexType row ) const;
 
     IndexType getInWarpOffset( const IndexType row,
@@ -143,7 +146,7 @@ public:
     void setLike( const AdEllpack< Real2, Device2, Index2 >& matrix );
 
     void reset();
-    
+
     template< typename Real2, typename Device2, typename Index2 >
     bool operator == ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const;
 
@@ -186,7 +189,7 @@ public:
               typename OutVector >
     void vectorProduct( const InVector& inVector,
                         OutVector& outVector ) const;
-    
+
     // copy assignment
     AdEllpack& operator=( const AdEllpack& matrix );
 
@@ -194,7 +197,7 @@ public:
     template< typename Real2, typename Device2, typename Index2,
              typename = typename Enabler< Device2 >::type >
     AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix );
-    
+
     void save( File& file ) const;
 
     void load( File& file );
@@ -242,29 +245,29 @@ public:
    void spmvCuda4( const InVector& inVector,
                    OutVector& outVector,
                    const int gridIdx ) const;
-   
+
    template< typename InVector,
           typename OutVector >
    __device__
    void spmvCuda8( const InVector& inVector,
                    OutVector& outVector,
                    const int gridIdx ) const;
-   
+
    template< typename InVector,
           typename OutVector >
    __device__
    void spmvCuda16( const InVector& inVector,
                     OutVector& outVector,
-                    const int gridIdx ) const;   
+                    const int gridIdx ) const;
 
    template< typename InVector,
           typename OutVector >
    __device__
    void spmvCuda32( const InVector& inVector,
                     OutVector& outVector,
-                    const int gridIdx ) const;   
-   
-   
+                    const int gridIdx ) const;
+
+
 #endif
 
 
@@ -293,4 +296,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/AdEllpack_impl.h>
+#include <TNL/Matrices/Legacy/AdEllpack_impl.h>
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/AdEllpack_impl.h
rename to src/TNL/Matrices/Legacy/AdEllpack_impl.h
index b7b97ff93550ef8c7289b749156e1fd5973e2f7d..242a3c81f810bd001c9beabe4c39e9048ff29e48 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/AdEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/TypeInfo.h>
@@ -220,6 +220,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
     }
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void AdEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/Legacy/BiEllpack.h
similarity index 95%
rename from src/TNL/Matrices/BiEllpack.h
rename to src/TNL/Matrices/Legacy/BiEllpack.h
index 3ec4b662fe19979939006a5cd011d037501fdb10..1a92581c71386e31d09b4bd811792fc6a5e6f493 100644
--- a/src/TNL/Matrices/BiEllpack.h
+++ b/src/TNL/Matrices/Legacy/BiEllpack.h
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -32,7 +32,7 @@ template< typename Real, typename Device, typename Index >
 class BiEllpack : public Sparse< Real, Device, Index >
 {
 private:
-    
+
     // convenient template alias for controlling the selection of copy-assignment operator
     template< typename Device2 >
     using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
@@ -40,13 +40,14 @@ private:
     // friend class will be needed for templated assignment operators
     template< typename Real2, typename Device2, typename Index2 >
     friend class BiEllpack;
-    
+
 public:
 	typedef Real RealType;
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
 	typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
 
@@ -62,15 +63,17 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
 	IndexType getRowLength( const IndexType row ) const;
 
 	template< typename Real2,
 			  typename Device2,
 			  typename Index2 >
 	void setLike( const BiEllpack< Real2, Device2, Index2 >& matrix );
-        
+
         void reset();
-        
+
         template< typename Real2, typename Device2, typename Index2 >
         bool operator == ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const;
 
@@ -142,7 +145,7 @@ public:
 	IndexType getNumberOfGroups( const IndexType row ) const;
 
 	bool vectorProductTest() const;
-        
+
         // copy assignment
         BiEllpack& operator=( const BiEllpack& matrix );
 
@@ -160,7 +163,7 @@ public:
 	void load( const String& fileName );
 
 	void print( std::ostream& str ) const;
-        
+
         void printValues() const;
 
 	void performRowBubbleSort( Containers::Vector< Index, Device, Index >& tempRowLengths );
@@ -217,5 +220,5 @@ private:
    } //namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/BiEllpack_impl.h>
+#include <TNL/Matrices/Legacy/BiEllpack_impl.h>
 
diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric.h
similarity index 100%
rename from src/TNL/Matrices/BiEllpackSymmetric.h
rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric.h
diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h
similarity index 100%
rename from src/TNL/Matrices/BiEllpackSymmetric_impl.h
rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/Legacy/BiEllpack_impl.h
similarity index 98%
rename from src/TNL/Matrices/BiEllpack_impl.h
rename to src/TNL/Matrices/Legacy/BiEllpack_impl.h
index c659b758e9cffe531a101baf8fe3cd812436fe2c..6db2ed6095926d2bcdb3950e996019756dacd422 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/BiEllpack_impl.h
@@ -11,7 +11,7 @@
 #pragma once
 
 
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <cstdio>
@@ -78,9 +78,9 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths )
     CompressedRowLengthsVector rowLengths;
     rowLengths.reset();
     rowLengths.setLike( constRowLengths );
-    
+
     rowLengths = constRowLengths;
-    
+
     if( this->getRows() % this->warpSize != 0 )
             this->setVirtualRows( this->getRows() + this->warpSize - ( this->getRows() % this->warpSize ) );
     else
@@ -88,7 +88,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths )
     IndexType strips = this->virtualRows / this->warpSize;
     this->rowPermArray.setSize( this->rows );
     this->groupPointers.setSize( strips * ( this->logWarpSize + 1 ) + 1 );
-    
+
     this->groupPointers.setValue( 0 );
 
     DeviceDependentCode::performRowBubbleSort( *this, rowLengths );
@@ -103,6 +103,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths )
     return this->allocateMatrixElements( this->warpSize * this->groupPointers.getElement( strips * ( this->logWarpSize + 1 ) ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void BiEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -128,7 +138,7 @@ Index BiEllpack< Real, Device, Index >::getNumberOfGroups( const IndexType row )
 	IndexType strip = row / this->warpSize;
 	IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip;
 	IndexType numberOfGroups = this->logWarpSize + 1;
-	IndexType bisection = 1;        
+	IndexType bisection = 1;
 	for( IndexType i = 0; i < this->logWarpSize + 1; i++ )
 	{
 		if( rowStripPermutation < bisection )
@@ -148,7 +158,7 @@ template< typename Real,
 		  typename Index >
 Index BiEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const
 {
-	TNL_ASSERT( row >= 0 && row < this->getRows(), 
+	TNL_ASSERT( row >= 0 && row < this->getRows(),
                     std::cerr << "row = " << row << " this->getRows() = " << this->getRows() );
 
 	const IndexType strip = row / this->warpSize;
@@ -182,7 +192,7 @@ template< typename Real,
 			  typename Device2,
 			  typename Index2 >
 void BiEllpack< Real, Device, Index >::setLike( const BiEllpack< Real2, Device2, Index2 >& matrix )
-{        
+{
 	Sparse< Real, Device, Index >::setLike( matrix );
 	this->rowPermArray.setLike( matrix.rowPermArray );
 	this->groupPointers.setLike( matrix.groupPointers );
@@ -212,9 +222,9 @@ bool BiEllpack< Real, Device, Index >::operator == ( const BiEllpack< Real2, Dev
                     << " matrix.getRows() = " << matrix.getRows()
                     << " this->getColumns() = " << this->getColumns()
                     << " matrix.getColumns() = " << matrix.getColumns() );
-   
+
    TNL_ASSERT_TRUE( false, "operator == is not yet implemented for BiEllpack.");
-   
+
    // TODO: implement this
    return false;
 }
@@ -284,10 +294,10 @@ bool BiEllpack< Real, Device, Index >::addElement( const IndexType row,
                                                               const RealType& value,
                                                               const RealType& thisElementMultiplicator )
 {
-    const IndexType strip = row / this->warpSize;    
-    const IndexType groupBegin = strip * ( this->logWarpSize + 1 );    
-    const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize;    
-    IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm;    
+    const IndexType strip = row / this->warpSize;
+    const IndexType groupBegin = strip * ( this->logWarpSize + 1 );
+    const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize;
+    IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm;
     IndexType rowMultiplicator = 1;
     IndexType step = this->warpSize;
 
@@ -685,7 +695,7 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In
                   "unknown device" );
    static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
                   "unknown device" );
-   
+
    this->setLike( matrix );
    this->values = matrix.values;
    this->columnIndexes = matrix.columnIndexes;
@@ -777,14 +787,14 @@ void BiEllpack< Real, Device, Index >::printValues() const
 {
     for( Index i = 0; i < this->values.getSize(); i++ ) {
         if( this->columnIndexes.getElement( i ) != this->getColumns() )
-            std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) 
+            std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i )
              << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl;
     }
-    
+
     for( Index i = 0; i < this->rowPermArray.getSize(); i++ ) {
         std::cout << "rowPermArray[ " << i << " ] = " << this->rowPermArray.getElement( i ) << std::endl;
     }
-    
+
     for( Index i = 0; i < this->groupPointers.getSize(); i++ ) {
         std::cout << "groupPointers[ " << i << " ] = " << this->groupPointers.getElement( i ) << std::endl;
     }
@@ -1146,7 +1156,7 @@ void BiEllpack< Real, Device, Index >::spmvCuda( const InVector& inVector,
     __syncthreads();
     if( warpStart + inWarpIdx >= this->getRows() )
         return;
-    
+
     outVector[ warpStart + inWarpIdx ] = results[ this->rowPermArray[ warpStart + inWarpIdx ] & ( cudaBlockSize - 1 ) ];
 }
 #endif
@@ -1321,7 +1331,7 @@ public:
                     const Index begin = matrix.groupPointers.getElement( groupBegin ) * matrix.warpSize + rowStripPerm * stripLength;
                     Index elementPtr = begin;
                     Index rowLength = 0;
-                    
+
                     for( Index group = 0; group < matrix.getNumberOfGroups( row ); group++ )
                     {
                         for( Index i = 0; i < matrix.getGroupLength( strip, group ); i++ )
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/Legacy/CSR.h
similarity index 96%
rename from src/TNL/Matrices/CSR.h
rename to src/TNL/Matrices/Legacy/CSR.h
index 485176d1d849b4be2c296a0f131f5ee2299f89f2..a31f3ee76ed3fc18925212b93e3c4c14837d0a36 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/Legacy/CSR.h
@@ -8,9 +8,9 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#pragma once 
+#pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 #include <TNL/Devices/Cuda.h>
@@ -18,7 +18,7 @@
 
 namespace TNL {
 namespace Matrices {
-   
+
 #ifdef HAVE_UMFPACK
     template< typename Matrix, typename Preconditioner >
     class UmfpackWrapper;
@@ -48,6 +48,7 @@ public:
    using DeviceType = Device;
    using IndexType = Index;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
    typedef Sparse< Real, Device, Index > BaseType;
    using MatrixRow = typename BaseType::MatrixRow;
@@ -71,13 +72,15 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
    IndexType getRowLength( const IndexType row ) const;
 
    __cuda_callable__
    IndexType getRowLengthFast( const IndexType row ) const;
 
    IndexType getNonZeroRowLength( const IndexType row ) const;
-   
+
    __cuda_callable__
    IndexType getNonZeroRowLengthFast( const IndexType row ) const;
 
@@ -264,7 +267,7 @@ protected:
    int cudaWarpSize, hybridModeSplit;
 
    typedef CSRDeviceDependentCode< DeviceType > DeviceDependentCode;
-   
+
    friend class CSRDeviceDependentCode< DeviceType >;
    friend class CusparseCSR< RealType >;
 };
@@ -272,4 +275,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/CSR_impl.h>
+#include <TNL/Matrices/Legacy/CSR_impl.h>
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h
similarity index 98%
rename from src/TNL/Matrices/CSR_impl.h
rename to src/TNL/Matrices/Legacy/CSR_impl.h
index db31d6dcde6a07cd8b19e87f843f3b6e8b994c5c..5fec923f0333ff6b6df3591ac526366bb0bc27de 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/Legacy/CSR_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -20,7 +20,7 @@
 #endif
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 #ifdef HAVE_CUSPARSE
 template< typename Real, typename Index >
@@ -99,6 +99,16 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng
    this->columnIndexes.setValue( this->columns );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void CSR< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -131,7 +141,7 @@ template< typename Real,
           typename Index >
 __cuda_callable__
 Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const
-{  
+{
    ConstMatrixRow matrixRow = this->getRow( row );
    return matrixRow.getNonZeroElementsCount();
 }
@@ -884,10 +894,10 @@ template<>
 class tnlCusparseCSRWrapper< float, int >
 {
    public:
- 
+
       typedef float Real;
       typedef int Index;
- 
+
       static void vectorProduct( const Index rows,
                                  const Index columns,
                                  const Index nnz,
@@ -924,10 +934,10 @@ template<>
 class tnlCusparseCSRWrapper< double, int >
 {
    public:
- 
+
       typedef double Real;
       typedef int Index;
- 
+
       static void vectorProduct( const Index rows,
                                  const Index columns,
                                  const Index nnz,
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/Legacy/ChunkedEllpack.h
similarity index 99%
rename from src/TNL/Matrices/ChunkedEllpack.h
rename to src/TNL/Matrices/Legacy/ChunkedEllpack.h
index 9d422079608f52e7e89a9954496cbd22c0786c06..a0f55b3263d0911455318886cc680f5242de820b 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/Legacy/ChunkedEllpack.h
@@ -22,7 +22,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -352,5 +352,5 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/ChunkedEllpack_impl.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack_impl.h>
 
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h
similarity index 99%
rename from src/TNL/Matrices/ChunkedEllpack_impl.h
rename to src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h
index 3b1fd9c8f9fce07344115282ba98411d364d95e3..4061597524742923f03a4115a52a16a2f44bb0ae 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Legacy/Ellpack.h
similarity index 95%
rename from src/TNL/Matrices/Ellpack.h
rename to src/TNL/Matrices/Legacy/Ellpack.h
index 6536f5f6ca6ffa7869851e2ad0883c51de83ed28..eea58b7571b1b6dd2416fbca0b3897f9782539d6 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Legacy/Ellpack.h
@@ -10,11 +10,11 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class EllpackDeviceDependentCode;
@@ -37,6 +37,7 @@ public:
    typedef Index IndexType;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
    typedef Sparse< Real, Device, Index > BaseType;
@@ -59,20 +60,22 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
    void setConstantCompressedRowLengths( const IndexType& rowLengths );
 
    IndexType getRowLength( const IndexType row ) const;
 
    __cuda_callable__
    IndexType getRowLengthFast( const IndexType row ) const;
-   
+
    IndexType getNonZeroRowLength( const IndexType row ) const;
 
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const Ellpack< Real2, Device2, Index2 >& matrix );
 
    void reset();
- 
+
    template< typename Real2, typename Device2, typename Index2 >
    bool operator == ( const Ellpack< Real2, Device2, Index2 >& matrix ) const;
 
@@ -175,9 +178,9 @@ public:
 								const Vector& old_x,
 								Vector& x,
 								const RealType& omega ) const;
-   
+
    // copy assignment
-   Ellpack& operator=( const Ellpack& matrix );   
+   Ellpack& operator=( const Ellpack& matrix );
 
    // cross-device copy assignment
    template< typename Real2, typename Device2, typename Index2,
@@ -207,4 +210,4 @@ protected:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Ellpack_impl.h>
+#include <TNL/Matrices/Legacy/Ellpack_impl.h>
diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/Legacy/EllpackSymmetric.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetric.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetric.h
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetricGraph.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h
diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetricGraph_impl.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h
diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h
similarity index 100%
rename from src/TNL/Matrices/EllpackSymmetric_impl.h
rename to src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Legacy/Ellpack_impl.h
similarity index 98%
rename from src/TNL/Matrices/Ellpack_impl.h
rename to src/TNL/Matrices/Legacy/Ellpack_impl.h
index 5ae12f408727bd1ae2f087f69fcb5bae2458fd55..04ca10385a67cf2782460abef6a4dd797ef95082 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Legacy/Ellpack_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -33,7 +33,7 @@ String Ellpack< Real, Device, Index >::getSerializationType()
 {
    return String( "Matrices::Ellpack< " ) +
           String( TNL::getType< Real >() ) +
-          ", [any device], " + 
+          ", [any device], " +
           getType< Index >() +
           String( " >" );
 }
@@ -66,7 +66,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
            IndexType missingRows = this->rows - this->alignedRows;
 
            missingRows = roundToMultiple( missingRows, Cuda::getWarpSize() );
-           
+
            this->alignedRows +=  missingRows;
        }
    }
@@ -86,10 +86,20 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow
    TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
 
    this->rowLengths = this->maxRowLength = max( rowLengths );
-   
+
    allocateElements();
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+void Ellpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index >
@@ -769,13 +779,13 @@ template< typename Real,
 void Ellpack< Real, Device, Index >::allocateElements()
 {
    IndexType numMtxElmnts = this->alignedRows * this->rowLengths;
-   
+
    if( this->alignedRows != 0 )
    {
-       TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths, 
+       TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths,
                       "Ellpack cannot store this matrix. The number of matrix elements has overflown the value that IndexType is capable of storing" );
    }
-   
+
    Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths );
 }
 
diff --git a/src/TNL/Matrices/Legacy/Multidiagonal.h b/src/TNL/Matrices/Legacy/Multidiagonal.h
new file mode 100644
index 0000000000000000000000000000000000000000..d9f1379f793f660de1c138609fd085d64909198b
--- /dev/null
+++ b/src/TNL/Matrices/Legacy/Multidiagonal.h
@@ -0,0 +1,224 @@
+/***************************************************************************
+                          Multidiagonal.h  -  description
+                             -------------------
+    begin                : Oct 13, 2011
+    copyright            : (C) 2011 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/Legacy/MultidiagonalRow.h>
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename Device >
+class MultidiagonalDeviceDependentCode;
+
+template< typename Real, typename Device = Devices::Host, typename Index = int >
+class Multidiagonal : public Matrix< Real, Device, Index >
+{
+private:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Multidiagonal;
+
+public:
+   typedef Real RealType;
+   typedef Device DeviceType;
+   typedef Index IndexType;
+   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
+   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef Matrix< Real, Device, Index > BaseType;
+   typedef MultidiagonalRow< Real, Index > MatrixRow;
+
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Multidiagonal< _Real, _Device, _Index >;
+
+   Multidiagonal();
+
+   static String getSerializationType();
+
+   virtual String getSerializationTypeVirtual() const;
+
+   void setDimensions( const IndexType rows,
+                       const IndexType columns );
+
+   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+
+   IndexType getRowLength( const IndexType row ) const;
+
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
+   IndexType getMaxRowLength() const;
+
+   template< typename Vector >
+   void setDiagonals( const Vector& diagonals );
+
+   const Containers::Vector< Index, Device, Index >& getDiagonals() const;
+
+   template< typename Real2, typename Device2, typename Index2 >
+   void setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix );
+
+   IndexType getNumberOfMatrixElements() const;
+
+   IndexType getNumberOfNonzeroMatrixElements() const;
+
+   IndexType getMaxRowlength() const;
+
+   void reset();
+
+   template< typename Real2, typename Device2, typename Index2 >
+   bool operator == ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const;
+
+   template< typename Real2, typename Device2, typename Index2 >
+   bool operator != ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const;
+
+   void setValue( const RealType& v );
+
+   __cuda_callable__
+   bool setElementFast( const IndexType row,
+                        const IndexType column,
+                        const RealType& value );
+
+   bool setElement( const IndexType row,
+                    const IndexType column,
+                    const RealType& value );
+
+   __cuda_callable__
+   bool addElementFast( const IndexType row,
+                        const IndexType column,
+                        const RealType& value,
+                        const RealType& thisElementMultiplicator = 1.0 );
+
+   bool addElement( const IndexType row,
+                    const IndexType column,
+                    const RealType& value,
+                    const RealType& thisElementMultiplicator = 1.0 );
+
+
+   __cuda_callable__
+   bool setRowFast( const IndexType row,
+                    const IndexType* columns,
+                    const RealType* values,
+                    const IndexType numberOfElements );
+
+   bool setRow( const IndexType row,
+                const IndexType* columns,
+                const RealType* values,
+                const IndexType numberOfElements );
+
+
+   __cuda_callable__
+   bool addRowFast( const IndexType row,
+                    const IndexType* columns,
+                    const RealType* values,
+                    const IndexType numberOfElements,
+                    const RealType& thisElementMultiplicator = 1.0 );
+
+   bool addRow( const IndexType row,
+                const IndexType* columns,
+                const RealType* values,
+                const IndexType numberOfElements,
+                const RealType& thisElementMultiplicator = 1.0 );
+
+   __cuda_callable__
+   RealType getElementFast( const IndexType row,
+                            const IndexType column ) const;
+
+   RealType getElement( const IndexType row,
+                        const IndexType column ) const;
+
+   __cuda_callable__
+   void getRowFast( const IndexType row,
+                    IndexType* columns,
+                    RealType* values ) const;
+
+   /*void getRow( const IndexType row,
+                IndexType* columns,
+                RealType* values ) const;*/
+
+   __cuda_callable__
+   MatrixRow getRow( const IndexType rowIndex );
+
+   __cuda_callable__
+   const MatrixRow getRow( const IndexType rowIndex ) const;
+
+   template< typename Vector >
+   __cuda_callable__
+   typename Vector::RealType rowVectorProduct( const IndexType row,
+                                               const Vector& vector ) const;
+
+   template< typename InVector,
+             typename OutVector >
+   void vectorProduct( const InVector& inVector,
+                       OutVector& outVector ) const;
+
+   template< typename Real2, typename Index2 >
+   void addMatrix( const Multidiagonal< Real2, Device, Index2 >& matrix,
+                   const RealType& matrixMultiplicator = 1.0,
+                   const RealType& thisMatrixMultiplicator = 1.0 );
+
+   template< typename Real2, typename Index2 >
+   void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
+                          const RealType& matrixMultiplicator = 1.0 );
+
+   template< typename Vector1, typename Vector2 >
+   bool performSORIteration( const Vector1& b,
+                             const IndexType row,
+                             Vector2& x,
+                             const RealType& omega = 1.0 ) const;
+
+   // copy assignment
+   Multidiagonal& operator=( const Multidiagonal& matrix );
+
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Multidiagonal& operator=( const Multidiagonal< Real2, Device2, Index2 >& matrix );
+
+   void save( File& file ) const;
+
+   void load( File& file );
+
+   void save( const String& fileName ) const;
+
+   void load( const String& fileName );
+
+   void print( std::ostream& str ) const;
+
+protected:
+
+   bool getElementIndex( const IndexType row,
+                         const IndexType column,
+                         IndexType& index ) const;
+
+   __cuda_callable__
+   bool getElementIndexFast( const IndexType row,
+                             const IndexType column,
+                             IndexType& index ) const;
+
+   Containers::Vector< Real, Device, Index > values;
+
+   Containers::Vector< Index, Device, Index > diagonalsShift;
+
+   typedef MultidiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
+   friend class MultidiagonalDeviceDependentCode< DeviceType >;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/Legacy/Multidiagonal_impl.h>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h
similarity index 96%
rename from src/TNL/Matrices/MultidiagonalMatrixSetter.h
rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h
index c10d0cc57e7ebf4282158fa6720bf6388c7b9c05..f9e7ef135420f417ab34d6f182dd569d74f30768 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixSetter.h
+++ b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Meshes/Grid.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/Legacy/Multidiagonal.h>
 
 namespace TNL {
 namespace Matrices {   
@@ -85,4 +85,4 @@ class MultidiagonalMatrixSetter< Meshes::Grid< 3, MeshReal, Device, MeshIndex >
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/MultidiagonalMatrixSetter_impl.h>
+#include <TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h
similarity index 100%
rename from src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h
rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h
diff --git a/src/TNL/Matrices/MultidiagonalRow.h b/src/TNL/Matrices/Legacy/MultidiagonalRow.h
similarity index 96%
rename from src/TNL/Matrices/MultidiagonalRow.h
rename to src/TNL/Matrices/Legacy/MultidiagonalRow.h
index 1d465d2296e9eb86dd3f376713d4e69fa172a09e..c41541eade10ca6b3fa2b98900dffadbc4e62fa9 100644
--- a/src/TNL/Matrices/MultidiagonalRow.h
+++ b/src/TNL/Matrices/Legacy/MultidiagonalRow.h
@@ -54,5 +54,5 @@ class MultidiagonalRow
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/MultidiagonalRow_impl.h>
+#include <TNL/Matrices/Legacy/MultidiagonalRow_impl.h>
 
diff --git a/src/TNL/Matrices/MultidiagonalRow_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h
similarity index 100%
rename from src/TNL/Matrices/MultidiagonalRow_impl.h
rename to src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h
similarity index 99%
rename from src/TNL/Matrices/Multidiagonal_impl.h
rename to src/TNL/Matrices/Legacy/Multidiagonal_impl.h
index 76f54f748c0744d810518cd9dde5872a894099ad..375e01c6d1cfe1439cf419d524bfba51d9b24b0b 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/Legacy/Multidiagonal.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/Legacy/SlicedEllpack.h
similarity index 96%
rename from src/TNL/Matrices/SlicedEllpack.h
rename to src/TNL/Matrices/Legacy/SlicedEllpack.h
index 7176019d2979c57007062e10f02b263047e58157..63b4330871820eb534c99b765da000cee10ba263 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/Legacy/SlicedEllpack.h
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/Sparse.h>
+#include <TNL/Matrices/Legacy/Sparse.h>
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
@@ -66,6 +66,7 @@ public:
    typedef Index IndexType;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
    typedef Sparse< Real, Device, Index > BaseType;
@@ -89,6 +90,8 @@ public:
 
    void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
+   void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
    IndexType getRowLength( const IndexType row ) const;
 
    __cuda_callable__
@@ -235,4 +238,4 @@ public:
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/SlicedEllpack_impl.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack_impl.h>
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetric.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h
diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h
similarity index 100%
rename from src/TNL/Matrices/SlicedEllpackSymmetric_impl.h
rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
similarity index 98%
rename from src/TNL/Matrices/SlicedEllpack_impl.h
rename to src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
index 8c629b563cfe47f258f44f0705cf7b8b5b6d2435..8673a02c5085f7ac6b793fcf670ddf9c98c2fd87 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h
@@ -10,13 +10,13 @@
 
 #pragma once
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Real,
           typename Device,
@@ -83,6 +83,17 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C
    this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+void SlicedEllpack< Real, Device, Index, SliceSize >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h
similarity index 92%
rename from src/TNL/Matrices/Sparse.h
rename to src/TNL/Matrices/Legacy/Sparse.h
index 7dc3798d22fa421655944f6ad6669725fece5e4c..4de00cb2e8b8197bbe186f891a69fc67e18ba7f0 100644
--- a/src/TNL/Matrices/Sparse.h
+++ b/src/TNL/Matrices/Legacy/Sparse.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/SparseRow.h>
+#include <TNL/Matrices/Legacy/SparseRow.h>
 
 namespace TNL {
 namespace Matrices {
@@ -26,7 +26,7 @@ class Sparse : public Matrix< Real, Device, Index >
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
+   typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVectorType ValuesVector;
    typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector;
    typedef Matrix< Real, Device, Index > BaseType;
    typedef SparseRow< RealType, IndexType > MatrixRow;
@@ -37,8 +37,6 @@ class Sparse : public Matrix< Real, Device, Index >
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const Sparse< Real2, Device2, Index2 >& matrix );
 
-   IndexType getNumberOfMatrixElements() const;
-
    IndexType getNumberOfNonzeroMatrixElements() const;
 
    IndexType getMaxRowLength() const;
@@ -66,5 +64,5 @@ class Sparse : public Matrix< Real, Device, Index >
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Sparse_impl.h>
+#include <TNL/Matrices/Legacy/Sparse_impl.h>
 #include <TNL/Matrices/SparseOperations.h>
diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/Legacy/SparseRow.h
similarity index 97%
rename from src/TNL/Matrices/SparseRow.h
rename to src/TNL/Matrices/Legacy/SparseRow.h
index f66cd2ceaf1c6f0cd882bb962a78c6649816aa75..4787e638a43f7f2c8f658d75eddd9e5aef9d415f 100644
--- a/src/TNL/Matrices/SparseRow.h
+++ b/src/TNL/Matrices/Legacy/SparseRow.h
@@ -80,4 +80,4 @@ std::ostream& operator<<( std::ostream& str, const SparseRow< Real, Index >& row
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/SparseRow_impl.h>
+#include <TNL/Matrices/Legacy/SparseRow_impl.h>
diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/Legacy/SparseRow_impl.h
similarity index 99%
rename from src/TNL/Matrices/SparseRow_impl.h
rename to src/TNL/Matrices/Legacy/SparseRow_impl.h
index 60dfd5034ee36dd01cc8f6cf616fe86dd238c29b..84f8e210e28832e838fb73aae560c9fd60a25930 100644
--- a/src/TNL/Matrices/SparseRow_impl.h
+++ b/src/TNL/Matrices/Legacy/SparseRow_impl.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include <TNL/Matrices/SparseRow.h>
+#include <TNL/Matrices/Legacy/SparseRow.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 // Following includes are here to enable usage of std::vector and std::cout. To avoid having to include Device type (HOW would this be done anyway)
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Legacy/Sparse_impl.h
similarity index 92%
rename from src/TNL/Matrices/Sparse_impl.h
rename to src/TNL/Matrices/Legacy/Sparse_impl.h
index d1643db19a48dbf078fe04389e9cb2d061b28a26..3e479441229197750dc384e210b0efefb044e1a3 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Legacy/Sparse_impl.h
@@ -33,16 +33,9 @@ template< typename Real,
 void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index2 >& matrix )
 {
    Matrix< Real, Device, Index >::setLike( matrix );
-   this->allocateMatrixElements( matrix.getNumberOfMatrixElements() );
+   this->allocateMatrixElements( matrix.getAllocatedElementsCount() );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Sparse< Real, Device, Index >::getNumberOfMatrixElements() const
-{
-   return this->values.getSize();
-}
 
 template< typename Real,
           typename Device,
@@ -82,7 +75,6 @@ template< typename Real,
 void Sparse< Real, Device, Index >::reset()
 {
    Matrix< Real, Device, Index >::reset();
-   this->values.reset();
    this->columnIndexes.reset();
 }
 
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index eb29f62c7669af6c32a92414900891accae736f2..129a54cbe0cf47499fa5faa5dab45ad09b50834e 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -11,9 +11,11 @@
 #pragma once
 
 #include <TNL/Object.h>
+#include <TNL/Allocators/Default.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
+#include <TNL/Matrices/MatrixView.h>
 
 namespace TNL {
 /**
@@ -23,39 +25,37 @@ namespace Matrices {
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
+          typename Index = int,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Matrix : public Object
 {
 public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-   typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-   typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-   typedef Containers::Vector< RealType, DeviceType, IndexType > ValuesVector;
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+   using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+   using ValuesVectorType = Containers::Vector< Real, Device, Index, RealAllocator >;
+   using RealAllocatorType = RealAllocator;
+   using ViewType = MatrixView< Real, Device, Index >;
+   using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >;
 
-   Matrix();
+   Matrix( const RealAllocatorType& allocator = RealAllocatorType() );
 
-   virtual void setDimensions( const IndexType rows,
-                                 const IndexType columns );
+   Matrix( const IndexType rows,
+           const IndexType columns,
+           const RealAllocatorType& allocator = RealAllocatorType() );
 
-   virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0;
+   void setDimensions( const IndexType rows,
+                       const IndexType columns );
 
-   virtual IndexType getRowLength( const IndexType row ) const = 0;
+   template< typename Matrix_ >
+   void setLike( const Matrix_& matrix );
 
-   // TODO: implementation is not parallel
-   // TODO: it would be nice if padding zeros could be stripped
-   void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
+   IndexType getAllocatedElementsCount() const;
 
-   virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
-
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Matrix< Real2, Device2, Index2 >& matrix );
-
-   virtual IndexType getNumberOfMatrixElements() const = 0;
-
-   virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
+   IndexType getNumberOfNonzeroMatrixElements() const;
 
    void reset();
 
@@ -65,38 +65,9 @@ public:
    __cuda_callable__
    IndexType getColumns() const;
 
-   /****
-    * TODO: The fast variants of the following methods cannot be virtual.
-    * If they were, they could not be used in the CUDA kernels. If CUDA allows it
-    * in the future and it does not slow down, declare them as virtual here.
-    */
-
-   virtual bool setElement( const IndexType row,
-                            const IndexType column,
-                            const RealType& value ) = 0;
-
-   virtual bool addElement( const IndexType row,
-                            const IndexType column,
-                            const RealType& value,
-                            const RealType& thisElementMultiplicator = 1.0 ) = 0;
-
-   virtual bool setRow( const IndexType row,
-                        const IndexType* columns,
-                        const RealType* values,
-                        const IndexType numberOfElements ) = 0;
-
-   virtual bool addRow( const IndexType row,
-                        const IndexType* columns,
-                        const RealType* values,
-                        const IndexType numberOfElements,
-                        const RealType& thisElementMultiplicator = 1.0 ) = 0;
-
-   virtual Real getElement( const IndexType row,
-                            const IndexType column ) const = 0;
-   
-   const ValuesVector& getValues() const;
-   
-   ValuesVector& getValues();
+   const ValuesVectorType& getValues() const;
+
+   ValuesVectorType& getValues();
 
    // TODO: parallelize and optimize for sparse matrices
    template< typename Matrix >
@@ -113,28 +84,22 @@ public:
 
 
    // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
    __cuda_callable__
    const IndexType& getNumberOfColors() const;
 
    // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
    void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
 
-   // TODO: what is this supposed to do?!?  There are redefinitions only in the
-   // EllpackSymmetricGraph and SlicedEllpackSymmetricGraph classes...
-   bool help( bool verbose = false ) { return true;};
-
-   // TODO: copy should be done in the operator= and it should work the other way too
-   void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix );
-
-   // TODO: missing implementation!
-   __cuda_callable__
-   Index getValuesSize() const;
-
    protected:
 
-   IndexType rows, columns, numberOfColors;
+   IndexType rows, columns;
 
-   ValuesVector values;
+   // TODO: remove
+   IndexType numberOfColors;
+
+   ValuesVectorType values;
 };
 
 template< typename Real, typename Device, typename Index >
@@ -144,14 +109,7 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index
    return str;
 }
 
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                              const InVector& inVector,
-                              OutVector& outVector );
-
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Matrix_impl.h>
+#include <TNL/Matrices/Matrix.hpp>
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce5f52274ec1134f30a52b64bf1572b7d757dc84
--- /dev/null
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -0,0 +1,254 @@
+/***************************************************************************
+                          Matrix_impl.h  -  description
+                             -------------------
+    begin                : Dec 18, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+#include <TNL/Cuda/SharedMemory.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+Matrix< Real, Device, Index, RealAllocator >::
+Matrix( const RealAllocatorType& allocator )
+: rows( 0 ),
+  columns( 0 ),
+  values( allocator )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+Matrix< Real, Device, Index, RealAllocator >::
+Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator )
+: rows( rows_ ),
+  columns( columns_ ),
+  values( allocator )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows,
+                                                   const IndexType columns )
+{
+   TNL_ASSERT( rows > 0 && columns > 0,
+               std::cerr << " rows = " << rows << " columns = " << columns );
+   this->rows = rows;
+   this->columns = columns;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+   template< typename Matrix_ >
+void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix )
+{
+   setDimensions( matrix.getRows(), matrix.getColumns() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const
+{
+   return this->values.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+__cuda_callable__
+Index Matrix< Real, Device, Index, RealAllocator >::getRows() const
+{
+   return this->rows;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+__cuda_callable__
+Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const
+{
+   return this->columns;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType&
+Matrix< Real, Device, Index, RealAllocator >::
+getValues() const
+{
+   return this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType&
+Matrix< Real, Device, Index, RealAllocator >::
+getValues()
+{
+   return this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::reset()
+{
+   this->rows = 0;
+   this->columns = 0;
+   this->values.reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+   template< typename MatrixT >
+bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const
+{
+   if( this->getRows() != matrix.getRows() ||
+       this->getColumns() != matrix.getColumns() )
+      return false;
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+         if( this->getElement( row, column ) != matrix.getElement( row, column ) )
+            return false;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+   template< typename MatrixT >
+bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const
+{
+   return ! operator == ( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const
+{
+   Object::save( file );
+   file.save( &this->rows );
+   file.save( &this->columns );
+   file << this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::load( File& file )
+{
+   Object::load( file );
+   file.load( &this->rows );
+   file.load( &this->columns );
+   file >> this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+__cuda_callable__
+const Index&
+Matrix< Real, Device, Index, RealAllocator >::
+getNumberOfColors() const
+{
+   return this->numberOfColors;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+void
+Matrix< Real, Device, Index, RealAllocator >::
+computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
+{
+    for( IndexType i = this->getRows() - 1; i >= 0; i-- )
+    {
+        // init color array
+        Containers::Vector< Index, Device, Index > usedColors;
+        usedColors.setSize( this->numberOfColors );
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            usedColors.setElement( j, 0 );
+
+        // find all colors used in given row
+        for( IndexType j = i + 1; j < this->getColumns(); j++ )
+             if( this->getElement( i, j ) != 0.0 )
+                 usedColors.setElement( colorsVector.getElement( j ), 1 );
+
+        // find unused color
+        bool found = false;
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            if( usedColors.getElement( j ) == 0 )
+            {
+                colorsVector.setElement( i, j );
+                found = true;
+                break;
+            }
+        if( !found )
+        {
+            colorsVector.setElement( i, this->numberOfColors );
+            this->numberOfColors++;
+        }
+    }
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..75cac40557058db2c41b06ca334787e8eb5f356f
--- /dev/null
+++ b/src/TNL/Matrices/MatrixInfo.h
@@ -0,0 +1,123 @@
+/***************************************************************************
+                          Matrix.h  -  description
+                             -------------------
+    begin                : Dec 18, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/String.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/DenseMatrixView.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Containers/Segments/CSRView.h>
+#include <TNL/Containers/Segments/EllpackView.h>
+#include <TNL/Containers/Segments/SlicedEllpackView.h>
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
+
+namespace TNL {
+/**
+ * \brief Namespace for matrix formats.
+ */
+namespace Matrices {
+
+template< typename Matrix >
+struct MatrixInfo
+{};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+struct MatrixInfo< DenseMatrixView< Real, Device, Index, RowMajorOrder > >
+{
+   static String getDensity() { return String( "dense" ); };
+};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+struct MatrixInfo< Dense< Real, Device, Index, RowMajorOrder, RealAllocator > >
+: public MatrixInfo< typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::ViewType >
+{
+};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename Device_, typename Index_ > class SegmentsView >
+struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return SegmentsView< Device, Index >::getSegmentsType(); };
+};
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+struct MatrixInfo< SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator > >
+: public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType >
+{
+};
+
+/////
+// Legacy matrices
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< BiEllpack< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "BiEllpack Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< CSR< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "CSR Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< ChunkedEllpack< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "ChunkedEllpack Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index >
+struct MatrixInfo< Ellpack< Real, Device, Index > >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "Ellpack Legacy"; };
+};
+
+template< typename Real, typename Device, typename Index, int SliceSize >
+struct MatrixInfo< SlicedEllpack< Real, Device, Index, SliceSize> >
+{
+   static String getDensity() { return String( "sparse" ); };
+
+   static String getFormat() { return "SlicedEllpack Legacy"; };
+};
+
+} //namespace Matrices
+} //namespace TNL
diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h
index aaf75a373fc8552db255f412868897497f7d3744..ae0606678f1b9167b10fd4b9e4868847c41c9b99 100644
--- a/src/TNL/Matrices/MatrixReader.h
+++ b/src/TNL/Matrices/MatrixReader.h
@@ -15,7 +15,7 @@
 #include <TNL/Containers/Vector.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename Device >
 class MatrixReaderDeviceDependentCode
@@ -30,24 +30,24 @@ class MatrixReader
    typedef typename Matrix::DeviceType DeviceType;
    typedef typename Matrix::RealType RealType;
 
-   static bool readMtxFile( const String& fileName,
+   static void readMtxFile( const String& fileName,
                             Matrix& matrix,
                             bool verbose = false,
                             bool symReader = false );
 
-   static bool readMtxFile( std::istream& file,
+   static void readMtxFile( std::istream& file,
                             Matrix& matrix,
                             bool verbose = false,
                             bool symReader = false );
 
-   static bool readMtxFileHostMatrix( std::istream& file,
+   static void readMtxFileHostMatrix( std::istream& file,
                                       Matrix& matrix,
                                       typename Matrix::CompressedRowLengthsVector& rowLengths,
                                       bool verbose,
                                       bool symReader );
 
 
-   static bool verifyMtxFile( std::istream& file,
+   static void verifyMtxFile( std::istream& file,
                               const Matrix& matrix,
                               bool verbose = false );
 
@@ -61,13 +61,13 @@ class MatrixReader
    static bool checkMtxHeader( const String& header,
                                bool& symmetric );
 
-   static bool readMtxHeader( std::istream& file,
+   static void readMtxHeader( std::istream& file,
                               IndexType& rows,
                               IndexType& columns,
                               bool& symmetricMatrix,
                               bool verbose );
 
-   static bool computeCompressedRowLengthsFromMtxFile( std::istream& file,
+   static void computeCompressedRowLengthsFromMtxFile( std::istream& file,
                                              Containers::Vector< int, DeviceType, int >& rowLengths,
                                              const int columns,
                                              const int rows,
@@ -75,13 +75,13 @@ class MatrixReader
                                              bool verbose,
                                              bool symReader = false );
 
-   static bool readMatrixElementsFromMtxFile( std::istream& file,
+   static void readMatrixElementsFromMtxFile( std::istream& file,
                                               Matrix& matrix,
                                               bool symmetricMatrix,
                                               bool verbose,
                                               bool symReader );
 
-   static bool parseMtxLineWithElement( const String& line,
+   static void parseMtxLineWithElement( const String& line,
                                         IndexType& row,
                                         IndexType& column,
                                         RealType& value );
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index d00fdb9044f18fee40ea339cc29fe534874d6027..df2c05c63237c23e0fa26ba331ac8a3d2da03b8c 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -11,6 +11,7 @@
 #pragma once
 
 #include <iomanip>
+#include <sstream>
 #include <TNL/String.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Timer.h>
@@ -20,7 +21,7 @@ namespace TNL {
 namespace Matrices {
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxFile( const String& fileName,
+void MatrixReader< Matrix >::readMtxFile( const String& fileName,
                                              Matrix& matrix,
                                              bool verbose,
                                              bool symReader )
@@ -28,24 +29,21 @@ bool MatrixReader< Matrix >::readMtxFile( const String& fileName,
    std::fstream file;
    file.open( fileName.getString(), std::ios::in );
    if( ! file )
-   {
-      std::cerr << "I am not able to open the file " << fileName << "." << std::endl;
-      return false;
-   }
-   return readMtxFile( file, matrix, verbose, symReader );
+      throw std::runtime_error( std::string( "I am not able to open the file " ) + fileName.getString() );
+   readMtxFile( file, matrix, verbose, symReader );
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxFile( std::istream& file,
+void MatrixReader< Matrix >::readMtxFile( std::istream& file,
                                              Matrix& matrix,
                                              bool verbose,
                                              bool symReader )
 {
-   return MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader );
+   MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader );
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
+void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
                                                        Matrix& matrix,
                                                        typename Matrix::CompressedRowLengthsVector& rowLengths,
                                                        bool verbose,
@@ -54,37 +52,29 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
    IndexType rows, columns;
    bool symmetricMatrix( false );
 
-   if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) )
-      return false;
+   readMtxHeader( file, rows, columns, symmetricMatrix, verbose );
 
    if( symReader && !symmetricMatrix )
-   {
-      std::cout << "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." );
 
    matrix.setDimensions( rows, columns );
    rowLengths.setSize( rows );
 
-   if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) )
-      return false;
+   computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose );
 
    matrix.setCompressedRowLengths( rowLengths );
 
-   if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) )
-      return false;
-   return true;
+   readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader );
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
+void MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
                                                const Matrix& matrix,
                                                bool verbose )
 {
    bool symmetricMatrix( false );
    IndexType rows, columns;
-   if( ! readMtxHeader( file, rows, columns, symmetricMatrix, false ) )
-      return false;
+   readMtxHeader( file, rows, columns, symmetricMatrix, false );
    file.clear();
    file.seekg( 0, std::ios::beg );
    String line;
@@ -102,16 +92,16 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
       }
       IndexType row( 1 ), column( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, row, column, value ) )
-         return false;
+      parseMtxLineWithElement( line, row, column, value );
       if( value != matrix.getElement( row-1, column-1 ) ||
           ( symmetricMatrix && value != matrix.getElement( column-1, row-1 ) ) )
       {
-         std::cerr << "*** !!! VERIFICATION ERROR !!! *** " << std::endl
-              << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl
-              << "The matrix value is " << matrix.getElement( row-1, column-1 )
-              << " while the file value is " << value << "." << std::endl;
-         return false;
+         std::stringstream str;
+         str << "*** !!! VERIFICATION ERROR !!! *** " << std::endl
+             << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl
+             << "The matrix value is " << matrix.getElement( row-1, column-1 )
+             << " while the file value is " << value << "." << std::endl;
+         throw std::runtime_error( str.str() );
       }
       processedElements++;
       if( symmetricMatrix && row != column )
@@ -126,7 +116,6 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file,
      std::cout << " Verifying the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements()
            << " -> " << timer.getRealTime()
            << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
-   return true;
 }
 
 template< typename Matrix >
@@ -152,8 +141,7 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file,
       }
       IndexType currentRow( 1 ), currentColumn( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, currentRow, currentColumn, value ) )
-         return false;
+      parseMtxLineWithElement( line, currentRow, currentColumn, value );
       if( ( currentRow == row + 1 && currentColumn == column + 1 ) ||
           ( symmetricMatrix && currentRow == column + 1 && currentColumn == row + 1 ) )
          return true;
@@ -166,41 +154,27 @@ bool MatrixReader< Matrix >::checkMtxHeader( const String& header,
                                                 bool& symmetric )
 {
    std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty );
-   if( (int) parsedLine.size() < 5 )
-      return false;
-   if( parsedLine[ 0 ] != "%%MatrixMarket" )
+   if( (int) parsedLine.size() < 5 || parsedLine[ 0 ] != "%%MatrixMarket" )
       return false;
    if( parsedLine[ 1 ] != "matrix" )
-   {
-      std::cerr << "Error: 'matrix' expected in the header line (" << header << ")." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( std::string( "Keyword 'matrix' is expected in the header line: " ) + header.getString() );
    if( parsedLine[ 2 ] != "coordinates" &&
        parsedLine[ 2 ] != "coordinate" )
-   {
-      std::cerr << "Error: Only 'coordinates' format is supported now, not " << parsedLine[ 2 ] << "." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( std::string( "Error: Only 'coordinates' format is supported now, not " ) + parsedLine[ 2 ].getString() );
    if( parsedLine[ 3 ] != "real" )
-   {
-      std::cerr << "Error: Only 'real' matrices are supported, not " << parsedLine[ 3 ] << "." << std::endl;
-      return false;
-   }
+      throw std::runtime_error( std::string( "Only 'real' matrices are supported, not " ) + parsedLine[ 3 ].getString() );
    if( parsedLine[ 4 ] != "general" )
    {
       if( parsedLine[ 4 ] == "symmetric" )
          symmetric = true;
       else
-      {
-         std::cerr << "Error: Only 'general' matrices are supported, not " << parsedLine[ 4 ] << "." << std::endl;
-         return false;
-      }
+         throw std::runtime_error(  std::string( "Only 'general' matrices are supported, not "  ) + parsedLine[ 4 ].getString() );
    }
    return true;
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
+void MatrixReader< Matrix >::readMtxHeader( std::istream& file,
                                                IndexType& rows,
                                                IndexType& columns,
                                                bool& symmetric,
@@ -217,26 +191,17 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
       if( ! headerParsed )
       {
          headerParsed = checkMtxHeader( line, symmetric );
-         if( ! headerParsed )
-            return false;
          if( verbose && symmetric )
            std::cout << "The matrix is SYMMETRIC ... ";
          continue;
       }
       if( line[ 0 ] == '%' ) continue;
       if( ! headerParsed )
-      {
-         std::cerr << "Unknown format of the file. We expect line like this:" << std::endl;
-         std::cerr << "%%MatrixMarket matrix coordinate real general" << std::endl;
-         return false;
-      }
+         throw std::runtime_error( "Unknown format of the file. We expect line like this: %%MatrixMarket matrix coordinate real general" );
 
       parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty );
       if( (int) parsedLine.size() != 3 )
-      {
-         std::cerr << "Wrong number of parameters in the matrix header." << std::endl;
-         return false;
-      }
+         throw std::runtime_error( "Wrong number of parameters in the matrix header - should be 3." );
       rows = atoi( parsedLine[ 0 ].getString() );
       columns = atoi( parsedLine[ 1 ].getString() );
       if( verbose )
@@ -244,16 +209,13 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
               << " rows and " << columns << " columns. " << std::endl;
 
       if( rows <= 0 || columns <= 0 )
-      {
-         std::cerr << "Wrong parameters in the matrix header." << std::endl;
-         return false;
-      }
-      return true;
+         throw std::runtime_error( "Row or column index is negative."  );
+      break;
    }
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file,
+void MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file,
                                                               Containers::Vector< int, DeviceType, int >& rowLengths,
                                                               const int columns,
                                                               const int rows,
@@ -279,13 +241,13 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea
       }
       IndexType row( 1 ), column( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, row, column, value ) )
-         return false;
+      parseMtxLineWithElement( line, row, column, value );
       numberOfElements++;
       if( column > columns || row > rows )
       {
-         std::cerr << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << "." << std::endl;
-         return false;
+         std::stringstream str;
+         str << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << ".";
+         throw std::runtime_error( str.str() );
       }
       if( verbose )
          std::cout << " Counting the matrix elements ... " << numberOfElements / 1000 << " thousands      \r" << std::flush;
@@ -298,23 +260,23 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea
 
       if( rowLengths[ row - 1 ] > columns )
       {
-         std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << "." << std::endl;
-         return false;
+         std::stringstream str;
+         str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << ".";
+         throw std::runtime_error( str.str() );
       }
       if( symmetricMatrix && row != column && symReader )
       {
          rowLengths[ column - 1 ]++;
          if( rowLengths[ column - 1 ] > columns )
          {
-            std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " ." << std::endl;
-            return false;
+            std::stringstream str;
+            str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " .";
+            throw std::runtime_error( str.str() );
          }
          continue;
       }
       else if( symmetricMatrix && row != column && !symReader )
-      {
           rowLengths[ column - 1 ]++;
-      }
    }
    file.clear();
    long int fileSize = file.tellg();
@@ -323,11 +285,10 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea
      std::cout << " Counting the matrix elements ... " << numberOfElements / 1000
            << " thousands  -> " << timer.getRealTime()
            << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
-   return true;
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
+void MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
                                                                Matrix& matrix,
                                                                bool symmetricMatrix,
                                                                bool verbose,
@@ -351,8 +312,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
       }
       IndexType row( 1 ), column( 1 );
       RealType value;
-      if( ! parseMtxLineWithElement( line, row, column, value ) )
-         return false;
+      parseMtxLineWithElement( line, row, column, value );
 
       if( !symReader ||
           ( symReader && row >= column ) )
@@ -362,9 +322,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
 
       processedElements++;
       if( symmetricMatrix && row != column && symReader )
-      {
           continue;
-      }
       else if( symmetricMatrix && row != column && !symReader )
       {
           matrix.setElement( column - 1, row - 1, value );
@@ -376,15 +334,13 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
    long int fileSize = file.tellg();
    timer.stop();
    if( verbose )
-     std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements()
+     std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getAllocatedElementsCount()
               << " -> " << timer.getRealTime()
               << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
-
-   return true;
 }
 
 template< typename Matrix >
-bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
+void MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
                                                          IndexType& row,
                                                          IndexType& column,
                                                          RealType& value )
@@ -392,13 +348,13 @@ bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
    std::vector< String > parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty );
    if( (int) parsedLine.size() != 3 )
    {
-      std::cerr << "Wrong number of parameters in the matrix row at line:" << line << std::endl;
-      return false;
+      std::stringstream str;
+      str << "Wrong number of parameters in the matrix row at line:" << line;
+      throw std::runtime_error( str.str() );
    }
    row = atoi( parsedLine[ 0 ].getString() );
    column = atoi( parsedLine[ 1 ].getString() );
    value = ( RealType ) atof( parsedLine[ 2 ].getString() );
-   return true;
 }
 
 template<>
@@ -407,13 +363,13 @@ class MatrixReaderDeviceDependentCode< Devices::Host >
    public:
 
    template< typename Matrix >
-   static bool readMtxFile( std::istream& file,
+   static void readMtxFile( std::istream& file,
                             Matrix& matrix,
                             bool verbose,
                             bool symReader )
    {
       typename Matrix::CompressedRowLengthsVector rowLengths;
-      return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
+      MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
    }
 };
 
@@ -423,7 +379,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
    public:
 
    template< typename Matrix >
-   static bool readMtxFile( std::istream& file,
+   static void readMtxFile( std::istream& file,
                             Matrix& matrix,
                             bool verbose,
                             bool symReader )
@@ -433,10 +389,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
 
       HostMatrixType hostMatrix;
       CompressedRowLengthsVector rowLengths;
-      return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
-
-      matrix = hostMatrix;
-      return true;
+      MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
    }
 };
 
diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d4cfe7ba4d592ca117005127660f800df287906
--- /dev/null
+++ b/src/TNL/Matrices/MatrixType.h
@@ -0,0 +1,63 @@
+/***************************************************************************
+                          MatrixType.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+
+template< bool Symmetric,
+          bool Binary >
+struct MatrixType
+{
+   static constexpr bool isSymmetric() { return Symmetric; }
+
+   static constexpr bool isBinary() { return Binary; }
+
+};
+
+struct GeneralMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return false; }
+};
+
+struct SymmetricMatrix
+{
+   static constexpr bool isSymmetric() { return true; }
+
+   static constexpr bool isBinary() { return false; }
+};
+
+struct BinaryMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return true; }
+};
+
+struct BinarySymmetricMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return true; }
+};
+
+struct SymmetricBinaryMatrix
+{
+   static constexpr bool isSymmetric() { return false; }
+
+   static constexpr bool isBinary() { return true; }
+};
+
+
+   } //namespace Matrices
+} //namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
new file mode 100644
index 0000000000000000000000000000000000000000..89551018186e279ced32005971ccfe44d7bda918
--- /dev/null
+++ b/src/TNL/Matrices/MatrixView.h
@@ -0,0 +1,117 @@
+/***************************************************************************
+                          MatrixView.h  -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Object.h>
+#include <TNL/Allocators/Default.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+
+namespace TNL {
+/**
+ * \brief Namespace for matrix formats.
+ */
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int >
+class MatrixView : public Object
+{
+public:
+   using RealType = Real;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+   using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+   using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >;
+   using ViewType = MatrixView< typename std::remove_const< Real >::type, Device, Index >;
+   using ConstViewType = MatrixView< typename std::add_const< Real >::type, Device, Index >;
+
+   __cuda_callable__
+   MatrixView();
+
+   __cuda_callable__
+   MatrixView( const IndexType rows,
+               const IndexType columns,
+               const ValuesView& values );
+
+   __cuda_callable__
+   MatrixView( const MatrixView& view ) = default;
+
+   IndexType getAllocatedElementsCount() const;
+
+   virtual IndexType getNumberOfNonzeroMatrixElements() const;
+
+   __cuda_callable__
+   IndexType getRows() const;
+
+   __cuda_callable__
+   IndexType getColumns() const;
+
+   __cuda_callable__
+   const ValuesView& getValues() const;
+
+   __cuda_callable__
+   ValuesView& getValues();
+
+   /**
+    * \brief Shallow copy of the matrix view.
+    *
+    * @param view
+    * @return
+    */
+   __cuda_callable__
+   MatrixView& operator=( const MatrixView& view );
+
+   // TODO: parallelize and optimize for sparse matrices
+   template< typename Matrix >
+   bool operator == ( const Matrix& matrix ) const;
+
+   template< typename Matrix >
+   bool operator != ( const Matrix& matrix ) const;
+
+   virtual void save( File& file ) const;
+
+   virtual void load( File& file );
+
+   virtual void print( std::ostream& str ) const;
+
+
+   // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
+   __cuda_callable__
+   const IndexType& getNumberOfColors() const;
+
+   // TODO: method for symmetric matrices, should not be in general Matrix interface
+   [[deprecated]]
+   void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
+
+   protected:
+
+   IndexType rows, columns;
+
+   ValuesView values;
+};
+
+template< typename Real, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, Index >& m )
+{
+   m.print( str );
+   return str;
+}
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MatrixView.hpp>
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dfac8f3afc671db39382fff6cc1916e73f3fc4b0
--- /dev/null
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -0,0 +1,225 @@
+/***************************************************************************
+                          MatrixView.hpp  -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+#include <TNL/Cuda/SharedMemory.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >::
+MatrixView()
+: rows( 0 ),
+  columns( 0 )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >::
+MatrixView( const IndexType rows_,
+            const IndexType columns_,
+            const ValuesView& values_ )
+ : rows( rows_ ), columns( columns_ ), values( values_ )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index
+MatrixView< Real, Device, Index >::
+getAllocatedElementsCount() const
+{
+   return this->values.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index
+MatrixView< Real, Device, Index >::
+getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index MatrixView< Real, Device, Index >::getRows() const
+{
+   return this->rows;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index MatrixView< Real, Device, Index >::getColumns() const
+{
+   return this->columns;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const typename MatrixView< Real, Device, Index >::ValuesView&
+MatrixView< Real, Device, Index >::
+getValues() const
+{
+   return this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+typename MatrixView< Real, Device, Index >::ValuesView&
+MatrixView< Real, Device, Index >::
+getValues()
+{
+   return this->values;
+}
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >&
+MatrixView< Real, Device, Index >::
+operator=( const MatrixView& view )
+{
+   rows = view.rows;
+   columns = view.columns;
+   values.bind( view.values );
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename MatrixT >
+bool MatrixView< Real, Device, Index >::operator == ( const MatrixT& matrix ) const
+{
+   if( this->getRows() != matrix.getRows() ||
+       this->getColumns() != matrix.getColumns() )
+      return false;
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+         if( this->getElement( row, column ) != matrix.getElement( row, column ) )
+            return false;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename MatrixT >
+bool MatrixView< Real, Device, Index >::operator != ( const MatrixT& matrix ) const
+{
+   return ! operator == ( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::save( File& file ) const
+{
+   Object::save( file );
+   file.save( &this->rows );
+   file.save( &this->columns );
+   file << this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::load( File& file )
+{
+   Object::load( file );
+   file.load( &this->rows );
+   file.load( &this->columns );
+   file >> this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::print( std::ostream& str ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const Index&
+MatrixView< Real, Device, Index >::
+getNumberOfColors() const
+{
+   return this->numberOfColors;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void
+MatrixView< Real, Device, Index >::
+computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
+{
+    for( IndexType i = this->getRows() - 1; i >= 0; i-- )
+    {
+        // init color array
+        Containers::Vector< Index, Device, Index > usedColors;
+        usedColors.setSize( this->numberOfColors );
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            usedColors.setElement( j, 0 );
+
+        // find all colors used in given row
+        for( IndexType j = i + 1; j < this->getColumns(); j++ )
+             if( this->getElement( i, j ) != 0.0 )
+                 usedColors.setElement( colorsVector.getElement( j ), 1 );
+
+        // find unused color
+        bool found = false;
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            if( usedColors.getElement( j ) == 0 )
+            {
+                colorsVector.setElement( i, j );
+                found = true;
+                break;
+            }
+        if( !found )
+        {
+            colorsVector.setElement( i, this->numberOfColors );
+            this->numberOfColors++;
+        }
+    }
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
deleted file mode 100644
index 3371ee4ec453d0c2d6af294ed6ab2df9d3623b32..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/Matrix_impl.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/***************************************************************************
-                          Matrix_impl.h  -  description
-                             -------------------
-    begin                : Dec 18, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Matrices/Matrix.h>
-#include <TNL/Assert.h>
-#include <TNL/Cuda/LaunchHelpers.h>
-#include <TNL/Cuda/MemoryHelpers.h>
-#include <TNL/Cuda/SharedMemory.h>
-
-namespace TNL {
-namespace Matrices {
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Matrix< Real, Device, Index >::Matrix()
-: rows( 0 ),
-  columns( 0 )
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                   const IndexType columns )
-{
-   TNL_ASSERT( rows > 0 && columns > 0,
-               std::cerr << " rows = " << rows << " columns = " << columns );
-   this->rows = rows;
-   this->columns = columns;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
-{
-   rowLengths.setSize( this->getRows() );
-   getCompressedRowLengths( rowLengths.getView() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
-{
-   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      rowLengths.setElement( row, this->getRowLength( row ) );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2,
-             typename Device2,
-             typename Index2 >
-void Matrix< Real, Device, Index >::setLike( const Matrix< Real2, Device2, Index2 >& matrix )
-{
-   setDimensions( matrix.getRows(), matrix.getColumns() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Matrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
-{
-    IndexType nonZeroElements( 0 );
-    for( IndexType i = 0; this->values.getSize(); i++ )
-        if( this->values.getElement( i ) != 0.0 )
-            nonZeroElements++;
-      
-    return nonZeroElements;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Index Matrix< Real, Device, Index >::getRows() const
-{
-   return this->rows;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Index Matrix< Real, Device, Index >::getColumns() const
-{
-   return this->columns;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-const typename Matrix< Real, Device, Index >::ValuesVector&
-Matrix< Real, Device, Index >::
-getValues() const
-{
-   return this->values;
-}
-   
-template< typename Real,
-          typename Device,
-          typename Index >
-typename Matrix< Real, Device, Index >::ValuesVector& 
-Matrix< Real, Device, Index >::
-getValues()
-{
-   return this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::reset()
-{
-   this->rows = 0;
-   this->columns = 0;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename MatrixT >
-bool Matrix< Real, Device, Index >::operator == ( const MatrixT& matrix ) const
-{
-   if( this->getRows() != matrix.getRows() ||
-       this->getColumns() != matrix.getColumns() )
-      return false;
-   for( IndexType row = 0; row < this->getRows(); row++ )
-      for( IndexType column = 0; column < this->getColumns(); column++ )
-         if( this->getElement( row, column ) != matrix.getElement( row, column ) )
-            return false;
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename MatrixT >
-bool Matrix< Real, Device, Index >::operator != ( const MatrixT& matrix ) const
-{
-   return ! operator == ( matrix );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::save( File& file ) const
-{
-   Object::save( file );
-   file.save( &this->rows );
-   file.save( &this->columns );
-   file << this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::load( File& file )
-{
-   Object::load( file );
-   file.load( &this->rows );
-   file.load( &this->columns );
-   file >> this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Matrix< Real, Device, Index >::print( std::ostream& str ) const
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-const Index&
-Matrix< Real, Device, Index >::
-getNumberOfColors() const
-{
-   return this->numberOfColors;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void 
-Matrix< Real, Device, Index >::
-computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
-{
-    for( IndexType i = this->getRows() - 1; i >= 0; i-- )
-    {
-        // init color array
-        Containers::Vector< Index, Device, Index > usedColors;
-        usedColors.setSize( this->numberOfColors );
-        for( IndexType j = 0; j < this->numberOfColors; j++ )
-            usedColors.setElement( j, 0 );
-
-        // find all colors used in given row
-        for( IndexType j = i + 1; j < this->getColumns(); j++ )
-             if( this->getElement( i, j ) != 0.0 )
-                 usedColors.setElement( colorsVector.getElement( j ), 1 );
-
-        // find unused color
-        bool found = false;
-        for( IndexType j = 0; j < this->numberOfColors; j++ )
-            if( usedColors.getElement( j ) == 0 )
-            {
-                colorsVector.setElement( i, j );
-                found = true;
-                break;
-            }
-        if( !found )
-        {
-            colorsVector.setElement( i, this->numberOfColors );
-            this->numberOfColors++;
-        }
-    }
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void
-Matrix< Real, Device, Index >::
-copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix )
-{
-    this->numberOfColors = matrix.getNumberOfColors();
-    this->columns = matrix.getColumns();
-    this->rows = matrix.getRows();
-
-    this->values.setSize( matrix.getValuesSize() );
-}
-
-#ifdef HAVE_CUDA
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
-                                                  const InVector* inVector,
-                                                  OutVector* outVector,
-                                                  int gridIdx )
-{
-   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx < matrix->getRows() )
-      ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
-}
-#endif
-
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-{
-#ifdef HAVE_CUDA
-   typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
-   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
-   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
-   {
-      if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-      MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
-                                     ( kernel_this,
-                                       kernel_inVector,
-                                       kernel_outVector,
-                                       gridIdx );
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   Cuda::freeFromDevice( kernel_this );
-   Cuda::freeFromDevice( kernel_inVector );
-   Cuda::freeFromDevice( kernel_outVector );
-   TNL_CHECK_CUDA_DEVICE;
-#endif
-}
-
-} // namespace Matrices
-} // namespace TNL
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 1ee6a25e9af4fbf8d8f28461e6658305e2b0151f..3b92d1db1c4343bb41832f6158377b1ac6356c9b 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -12,213 +12,216 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/MultidiagonalRow.h>
+#include <TNL/Matrices/MultidiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h>
+#include <TNL/Matrices/MultidiagonalMatrixView.h>
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
+{
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using IndexAllocatorType = IndexAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >;
+      using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
 
-template< typename Device >
-class MultidiagonalDeviceDependentCode;
+      using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
+      using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType;
 
-template< typename Real, typename Device = Devices::Host, typename Index = int >
-class Multidiagonal : public Matrix< Real, Device, Index >
-{
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
 
-   // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Multidiagonal;
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Multidiagonal< _Real, _Device, _Index >;
 
-public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef MultidiagonalRow< Real, Index > MatrixRow;
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
 
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Multidiagonal< _Real, _Device, _Index >;
+      Multidiagonal();
 
-   Multidiagonal();
+      Multidiagonal( const IndexType rows,
+                     const IndexType columns );
 
-   static String getSerializationType();
+      template< typename Vector >
+      Multidiagonal( const IndexType rows,
+                     const IndexType columns,
+                     const Vector& diagonalsShifts );
 
-   virtual String getSerializationTypeVirtual() const;
+      ViewType getView() const; // TODO: remove const
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      //ConstViewType getConstView() const;
 
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      static String getSerializationType();
 
-   IndexType getRowLength( const IndexType row ) const;
+      virtual String getSerializationTypeVirtual() const;
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
+      template< typename Vector >
+      void setDimensions( const IndexType rows,
+                          const IndexType columns,
+                          const Vector&  diagonalsShifts );
 
-   IndexType getMaxRowLength() const;
+      //template< typename Vector >
+      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
 
-   template< typename Vector >
-   void setDiagonals( const Vector& diagonals );
+      const IndexType& getDiagonalsCount() const;
 
-   const Containers::Vector< Index, Device, Index >& getDiagonals() const;
+      const DiagonalsShiftsType& getDiagonalsShifts() const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix );
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   IndexType getNumberOfMatrixElements() const;
+      IndexType getNonemptyRowsCount() const;
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   IndexType getMaxRowlength() const;
+      IndexType getMaxRowLength() const;
 
-   void reset();
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m );
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator == ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator != ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const;
+      void reset();
 
-   void setValue( const RealType& v );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
 
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      void setValue( const RealType& v );
 
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType numberOfElements );
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType numberOfElements );
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
 
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType numberOfElements,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType numberOfElements,
-                const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   __cuda_callable__
-   RealType getElementFast( const IndexType row,
-                            const IndexType column ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   RealType getElement( const IndexType row,
-                        const IndexType column ) const;
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   /*void getRow( const IndexType row,
-                IndexType* columns,
-                RealType* values ) const;*/
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   __cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Real2, typename Index2 >
+      void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename InVector,
-             typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   template< typename Real2, typename Index2 >
-   void addMatrix( const Multidiagonal< Real2, Device, Index2 >& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      // copy assignment
+      Multidiagonal& operator=( const Multidiagonal& matrix );
 
-   template< typename Real2, typename Index2 >
-   void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      // cross-device copy assignment
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                bool RowMajorOrder_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix );
 
-   template< typename Vector1, typename Vector2 >
-   bool performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      void save( File& file ) const;
 
-   // copy assignment
-   Multidiagonal& operator=( const Multidiagonal& matrix );
+      void load( File& file );
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Multidiagonal& operator=( const Multidiagonal< Real2, Device2, Index2 >& matrix );
+      void save( const String& fileName ) const;
 
-   void save( File& file ) const;
+      void load( const String& fileName );
 
-   void load( File& file );
+      void print( std::ostream& str ) const;
 
-   void save( const String& fileName ) const;
+      const IndexerType& getIndexer() const;
 
-   void load( const String& fileName );
+      IndexerType& getIndexer();
 
-   void print( std::ostream& str ) const;
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
 
-protected:
+   protected:
 
-   bool getElementIndex( const IndexType row,
-                         const IndexType column,
-                         IndexType& index ) const;
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
 
-   __cuda_callable__
-   bool getElementIndexFast( const IndexType row,
-                             const IndexType column,
-                             IndexType& index ) const;
+      DiagonalsShiftsType diagonalsShifts;
 
-   Containers::Vector< Real, Device, Index > values;
+      HostDiagonalsShiftsType hostDiagonalsShifts;
 
-   Containers::Vector< Index, Device, Index > diagonalsShift;
+      IndexerType indexer;
 
-   typedef MultidiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class MultidiagonalDeviceDependentCode< DeviceType >;
+      ViewType view;
 };
 
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Multidiagonal_impl.h>
+#include <TNL/Matrices/Multidiagonal.hpp>
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8eb6675134f20ecd2f74f3a984d59c94bcd8bbf
--- /dev/null
+++ b/src/TNL/Matrices/Multidiagonal.hpp
@@ -0,0 +1,950 @@
+/***************************************************************************
+                          Multidiagonal.hpp  -  description
+                             -------------------
+    begin                : Oct 13, 2011
+    copyright            : (C) 2011 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <sstream>
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Device >
+class MultidiagonalDeviceDependentCode;
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+Multidiagonal()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+Multidiagonal( const IndexType rows,
+               const IndexType columns,
+               const Vector& diagonalsShifts )
+{
+   TNL_ASSERT_GT( diagonalsShifts.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." );
+   this->setDimensions( rows, columns, diagonalsShifts );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getView() const -> ViewType
+{
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< Multidiagonal* >( this )->values.getView(),
+                    const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(),
+                    const_cast< Multidiagonal* >( this )->hostDiagonalsShifts.getView(),
+                    indexer );
+}
+
+/*template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}*/
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getSerializationType()
+{
+   return String( "Matrices::Multidiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator], [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setDimensions( const IndexType rows,
+               const IndexType columns,
+               const Vector& diagonalsShifts )
+{
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   this->diagonalsShifts = diagonalsShifts;
+   this->hostDiagonalsShifts = diagonalsShifts;
+   const IndexType minShift = min( diagonalsShifts );
+   IndexType nonemptyRows = min( rows, columns );
+   if( rows > columns && minShift < 0 )
+      nonemptyRows = min( rows, nonemptyRows - minShift );
+   this->indexer.set( rows, columns, diagonalsShifts.getSize(), nonemptyRows );
+   this->values.setSize( this->indexer.getStorageSize() );
+   this->values = 0.0;
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+ //  template< typename Vector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
+{
+   if( max( rowLengths ) > 3 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( rowLengths.getElement( 0 ) > 2 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   if( this->getRows() > this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 1 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() == this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 2 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() < this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+const Index&
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getDiagonalsCount() const
+{
+   return this->view.getDiagonalsCount();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getDiagonalsShifts() const -> const DiagonalsShiftsType&
+{
+   return this->diagonalsShifts;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   return this->view.getCompressedRowLengths( rowLengths );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getNonemptyRowsCount() const
+{
+   return this->indexer.getNonemptyRowsCount();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getRowLength( const IndexType row ) const
+{
+   return this->view.getRowLength( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getMaxRowLength() const
+{
+   return this->view.getMaxRowLength();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
+{
+   this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getNumberOfNonzeroMatrixElements() const
+{
+   return this->view.getNumberOfNonzeroMatrixElements();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+reset()
+{
+   Matrix< Real, Device, Index >::reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   return ! this->operator==( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setValue( const RealType& v )
+{
+   this->view.setValue( v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   this->view.setElement( row, column, value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   this->view.addElement( row, column, value, thisElementMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Real
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getElement( const IndexType row, const IndexType column ) const
+{
+   return this->view.getElement( row, column );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   this->view.forRows( first, last, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+  template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   this->view.forRows( first, last, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forAllRows( Function& function ) const
+{
+   this->view.forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+forAllRows( Function& function )
+{
+   this->view.forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+   return this->view.rowVectorProduct();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename InVector,
+             typename OutVector >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   this->view.vectorProduct( inVector, outVector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator );
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Multidiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real2, typename Index2 >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
+                                                                    const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      Multidiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Multidiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b,
+                                                              const IndexType row,
+                                                              Vector2& x,
+                                                              const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >&
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix )
+{
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+}
+
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_, typename IndexAllocator_ >
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >&
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix )
+{
+   using RHSMatrix = Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+   using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
+
+   this->setLike( matrix );
+   if( RowMajorOrder == RowMajorOrder_ )
+      this->values = matrix.getValues();
+   else
+   {
+      if( std::is_same< Device, Device_ >::value )
+      {
+         const auto matrix_view = matrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+      else
+      {
+         const IndexType maxRowLength = this->diagonalsShifts.getSize();
+         const IndexType bufferRowsCount( 128 );
+         const size_t bufferSize = bufferRowsCount * maxRowLength;
+         Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+         Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize );
+         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+         Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
+         auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+         auto thisValuesBuffer_view = thisValuesBuffer.getView();
+
+         IndexType baseRow( 0 );
+         const IndexType rowsCount = this->getRows();
+         while( baseRow < rowsCount )
+         {
+            const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+            ////
+            // Copy matrix elements into buffer
+            auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+                  const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+                  matrixValuesBuffer_view[ bufferIdx ] = value;
+            };
+            matrix.forRows( baseRow, lastRow, f1 );
+
+            ////
+            // Copy the source matrix buffer to this matrix buffer
+            thisValuesBuffer_view = matrixValuesBuffer_view;
+
+            ////
+            // Copy matrix elements from the buffer to the matrix
+            auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value, bool& compute  ) mutable {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+                  value = thisValuesBuffer_view[ bufferIdx ];
+            };
+            this->forRows( baseRow, lastRow, f2 );
+            baseRow += bufferRowsCount;
+         }
+      }
+   }
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( File& file ) const
+{
+   Matrix< Real, Device, Index >::save( file );
+   file << diagonalsShifts;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( File& file )
+{
+   Matrix< Real, Device, Index >::load( file );
+   file >> this->diagonalsShifts;
+   this->hostDiagonalsShifts = this->diagonalsShifts;
+   const IndexType minShift = min( diagonalsShifts );
+   IndexType nonemptyRows = min( this->getRows(), this->getColumns() );
+   if( this->getRows() > this->getColumns() && minShift < 0 )
+      nonemptyRows = min( this->getRows(), nonemptyRows - minShift );
+   this->indexer.set( this->getRows(), this->getColumns(), diagonalsShifts.getSize(), nonemptyRows );
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+print( std::ostream& str ) const
+{
+   this->view.print( str );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getElementIndex( const IndexType row, const IndexType column ) const
+{
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+
+   return this->indexer.getGlobalIndex( row, localIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
+getPaddingIndex() const
+{
+   return this->view.getPaddingIndex();
+}
+
+/*
+template<>
+class MultidiagonalDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return 2*row + column;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType  >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ 1 ];
+         Index i = 3 * row;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i - 1 ] +
+                   vector[ row ] * values[ i ];
+         return vector[ row - 1 ] * values[ i - 1 ] +
+                vector[ row ] * values[ i ] +
+                vector[ row + 1 ] * values[ i + 1 ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class MultidiagonalDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return ( column - row + 1 )*rows + row - 1;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ rows - 1 ];
+         Index i = row - 1;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i ] +
+                   vector[ row ] * values[ i + rows ];
+         return vector[ row - 1 ] * values[ i ] +
+                vector[ row ] * values[ i + rows ] +
+                vector[ row + 1 ] * values[ i + 2*rows ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+ */
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
new file mode 100644
index 0000000000000000000000000000000000000000..0825d6fb365ebd6552ee033d41a1fe208219a14e
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+                          MultidiagonalMatrixRowView.h  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView,
+          typename Indexer,
+          typename DiagonalsShiftsView_ >
+class MultidiagonalMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using IndexType = typename ValuesView::IndexType;
+      using ValuesViewType = ValuesView;
+      using IndexerType = Indexer;
+      using DiagonalsShiftsView = DiagonalsShiftsView_;
+
+      __cuda_callable__
+      MultidiagonalMatrixRowView( const IndexType rowIdx,
+                                  const DiagonalsShiftsView& diagonalsShifts,
+                                  const ValuesViewType& values,
+                                  const IndexerType& indexer);
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const RealType& value );
+   protected:
+
+      IndexType rowIdx;
+
+      DiagonalsShiftsView diagonalsShifts;
+
+      ValuesViewType values;
+
+      Indexer indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MultidiagonalMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..855b8463aa13eb5d21bee65923704d2be1d897ba
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
@@ -0,0 +1,76 @@
+/***************************************************************************
+                          MultidiagonalMatrixRowView.hpp  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+__cuda_callable__
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView( const IndexType rowIdx,
+                            const DiagonalsShiftsView& diagonalsShifts,
+                            const ValuesViewType& values,
+                            const IndexerType& indexer )
+: rowIdx( rowIdx ), diagonalsShifts( diagonalsShifts ), values( values ), indexer( indexer )
+{
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+getSize() const -> IndexType
+{
+   return diagonalsShifts.getSize();//indexer.getRowSize( rowIdx );
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType
+{
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, indexer.getDiagonals(), "" );
+   return rowIdx + diagonalsShifts[ localIdx ];
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+__cuda_callable__
+void 
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+setElement( const IndexType localIdx,
+            const RealType& value )
+{
+   this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
new file mode 100644
index 0000000000000000000000000000000000000000..97ff94f85c6b81cb06b6832c1836ace71a7fdbbd
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -0,0 +1,187 @@
+/***************************************************************************
+                          MultidiagonalMatrixView.h  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/MatrixView.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/MultidiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
+{
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
+      //using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >;
+      using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
+      using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >;
+      using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value >
+      using Self = MultidiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >;
+
+      MultidiagonalMatrixView();
+
+      MultidiagonalMatrixView( const ValuesViewType& values,
+                               const DiagonalsShiftsView& diagonalsShifts,
+                               const HostDiagonalsShiftsView& hostDiagonalsShifts,
+                               const IndexerType& indexer );
+
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      __cuda_callable__
+      const IndexType& getDiagonalsCount() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      IndexType getNonemptyRowsCount() const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getMaxRowLength() const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      void setValue( const RealType& v );
+
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view );
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      void save( File& file ) const;
+
+      void save( const String& fileName ) const;
+
+      void print( std::ostream& str ) const;
+
+      __cuda_callable__
+      const IndexerType& getIndexer() const;
+
+      __cuda_callable__
+      IndexerType& getIndexer();
+
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
+
+      DiagonalsShiftsView diagonalsShifts;
+
+      HostDiagonalsShiftsView hostDiagonalsShifts;
+
+      IndexerType indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MultidiagonalMatrixView.hpp>
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ecfe1c1d8310577812aa2e46c9cc1fc6d8e0035f
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -0,0 +1,737 @@
+/***************************************************************************
+                          MultidiagonalMatrixView.hpp  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/MultidiagonalMatrixView.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+MultidiagonalMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+MultidiagonalMatrixView( const ValuesViewType& values,
+                         const DiagonalsShiftsView& diagonalsShifts,
+                         const HostDiagonalsShiftsView& hostDiagonalsShifts,
+                         const IndexerType& indexer )
+: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ),
+  diagonalsShifts( diagonalsShifts ),
+  hostDiagonalsShifts( hostDiagonalsShifts ),
+  indexer( indexer )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getView() -> ViewType
+{
+   return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(),
+                    const_cast< MultidiagonalMatrixView* >( this )->diagonalsShifts.getView(),
+                    const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsShifts.getView(),
+                    indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(),
+                         this->diagonalsShifts.getConstView(),
+                         this->hostDiagonalsShifts.getConstView(),
+                         indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
+{
+   return String( "Matrices::Multidiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+const Index&
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getDiagonalsCount() const
+{
+   return this->diagonalsShifts.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNonemptyRowsCount() const
+{
+   return this->indexer.getNonemptyRowsCount();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRowLength( const IndexType row ) const
+{
+   return this->diagonalsShifts.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getMaxRowLength() const
+{
+   return this->diagonalsShifts.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   return ! this->operator==( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setValue( const RealType& v )
+{
+   // we dont do this->values = v here because it would set even elements 'outside' the matrix
+   // method getNumberOfNonzeroElements would not well
+   const RealType newValue = v;
+   auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable {
+      value = newValue;
+   };
+   this->forAllRows( f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
+   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
+      if( row + hostDiagonalsShifts[ i ] == column )
+      {
+         this->values.setElement( this->getElementIndex( row, i ), value );
+         return;
+      }
+   if( value != 0.0 )
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in multidiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
+   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
+      if( row + hostDiagonalsShifts[ i ] == column )
+      {
+         const Index idx = this->getElementIndex( row, i );
+         this->values.setElement( idx, thisElementMultiplicator * this->values.getElement( idx ) + value );
+         return;
+      }
+   if( value != 0.0 )
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in multidiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Real
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row, const IndexType column ) const
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
+   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
+      if( row + hostDiagonalsShifts[ i ] == column )
+         return this->values.getElement( this->getElementIndex( row, i ) );
+   return 0.0;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const MultidiagonalMatrixView& view )
+{
+   MatrixView< Real, Device, Index >::operator=( view );
+   this->diagonalsShifts.bind( view.diagonalsShifts );
+   this->hostDiagonalsShifts.bind( view.hostDiagonalsShifts );
+   this->indexer = view.indexer;
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
+{
+   using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
+   const auto values_view = this->values.getConstView();
+   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
+   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const IndexType columns = this->getColumns();
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      Real_ sum( zero );
+      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
+      {
+         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) );
+      }
+      keep( rowIdx, sum );
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
+   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const IndexType columns = this->getColumns();
+   const auto indexer = this->indexer;
+   bool compute( true );
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
+      {
+         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+  template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
+   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const IndexType columns = this->getColumns();
+   const auto indexer = this->indexer;
+   bool compute( true );
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ )
+      {
+         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->indexer.getNonemptyRowsCount(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType 
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename InVector,
+             typename OutVector >
+void 
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
+      return value * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." );
+   TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." );
+
+   /*if( RowMajorOrder == RowMajorOrder_ )
+   {
+      if( thisMatrixMultiplicator == 1.0 )
+         this->values += matrixMultiplicator * matrix.getValues();
+      else
+         this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues();
+   }
+   else
+   {
+      const auto matrix_view = matrix;
+      const auto matrixMult = matrixMultiplicator;
+      const auto thisMult = thisMatrixMultiplicator;
+      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      if( thisMult == 0.0 )
+         this->forAllRows( add0 );
+      else if( thisMult == 1.0 )
+         this->forAllRows( add1 );
+      else
+         this->forAllRows( addGen );
+   }*/
+}
+
+#ifdef HAVE_CUDA
+/*template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Multidiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}*/
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real2, typename Index2 >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      /*Multidiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Multidiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;*/
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
+{
+   MatrixView< Real, Device, Index >::save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
+{
+   for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ )
+   {
+      str <<"Row: " << rowIdx << " -> ";
+      for( IndexType localIdx = 0; localIdx < this->hostDiagonalsShifts.getSize(); localIdx++ )
+      {
+         const IndexType columnIdx = rowIdx + this->hostDiagonalsShifts[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < this->columns )
+         {
+            auto v = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) );
+            if( v )
+               str << " Col:" << columnIdx << "->" << v  << "\t";
+         }
+      }
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElementIndex( const IndexType row, const IndexType localIdx ) const
+{
+   return this->indexer.getGlobalIndex( row, localIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index
+MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..032767518cc275e707961af48e474f4210256ef1
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -0,0 +1,245 @@
+/***************************************************************************
+                          SparseMatrix.h -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Allocators/Default.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrixRowView.h>
+#include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Matrices/Dense.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device = Devices::Host,
+          typename Index = int,
+          typename MatrixType = GeneralMatrix,
+          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Containers::Segments::CSR,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
+{
+   public:
+      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
+      static constexpr bool isBinary() { return MatrixType::isBinary(); };
+
+      static_assert(
+            ! isSymmetric() ||
+            ! std::is_same< Device, Devices::Cuda >::value ||
+            ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
+            "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." );
+
+      using RealType = Real;
+      template< typename Device_, typename Index_, typename IndexAllocator_ >
+      using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
+      using SegmentsType = Segments< Device, Index, IndexAllocator >;
+      template< typename Device_, typename Index_ >
+      using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >;
+      using SegmentsViewType = typename SegmentsType::ViewType;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using IndexAllocatorType = IndexAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
+      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
+      using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
+      using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
+      SparseMatrix( const SparseMatrix& m );
+
+      SparseMatrix( const SparseMatrix&& m );
+
+      SparseMatrix( const IndexType rows,
+                    const IndexType columns,
+                    const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
+      SparseMatrix( const std::initializer_list< IndexType >& rowCapacities,
+                    const IndexType columns,
+                    const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
+      SparseMatrix( const IndexType rows,
+                    const IndexType columns,
+                    const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data,
+                    const RealAllocatorType& realAllocator = RealAllocatorType(),
+                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+
+      ViewType getView() const; // TODO: remove const
+
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename RowsCapacitiesVector >
+      void setCompressedRowLengths( const RowsCapacitiesVector& rowCapacities );
+
+      // TODO: Remove this when possible
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) {
+         this->setCompressedRowLengths( rowLengths );
+      };
+
+      void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data );
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      [[deprecated]]
+      virtual IndexType getRowLength( const IndexType row ) const { return 0;};
+
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      void reset();
+
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator );
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      /***
+       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType& matrixMultiplicator = 1.0,
+                          const RealType& outVectorMultiplicator = 0.0 ) const;
+
+      /*template< typename Real2, typename Index2 >
+      void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+       */
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector1, typename Vector2 >
+      bool performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      /**
+       * \brief Assignment of exactly the same matrix type.
+       * @param matrix
+       * @return
+       */
+      SparseMatrix& operator=( const SparseMatrix& matrix );
+
+      /**
+       * \brief Assignment of dense matrix
+       */
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ >
+      SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix );
+
+
+      /**
+       * \brief Assignment of any other matrix type.
+       * @param matrix
+       * @return
+       */
+      template< typename RHSMatrix >
+      SparseMatrix& operator=( const RHSMatrix& matrix );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void print( std::ostream& str ) const;
+
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+// TODO: restore it and also in Matrix
+//   protected:
+
+      ColumnsIndexesVectorType columnIndexes;
+
+      SegmentsType segments;
+
+      IndexAllocator indexAllocator;
+
+      //RealAllocator realAllocator;
+
+      ViewType view;
+};
+
+}  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Matrices/SparseMatrix.hpp>
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c1f3b1ce41d27adf2a804e7171ec21f89ba7313
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -0,0 +1,955 @@
+/***************************************************************************
+                          SparseMatrix.hpp -  description
+                             -------------------
+    begin                : Nov 29, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <functional>
+#include <sstream>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+   : BaseType( realAllocator ), columnIndexes( indexAllocator )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const SparseMatrix& m )
+   : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const SparseMatrix&& m )
+   : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const IndexType rows,
+              const IndexType columns,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const std::initializer_list< IndexType >& rowCapacities,
+              const IndexType columns,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator )
+{
+   this->setCompressedRowLengths( RowsCapacitiesType( rowCapacities ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+SparseMatrix( const IndexType rows,
+              const IndexType columns,
+              const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator )
+{
+   this->setElements( data );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getView() const -> ViewType
+{
+   return ViewType( this->getRows(),
+                    this->getColumns(),
+                    const_cast< SparseMatrix* >( this )->getValues().getView(),  // TODO: remove const_cast
+                    const_cast< SparseMatrix* >( this )->columnIndexes.getView(),
+                    const_cast< SparseMatrix* >( this )->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->columnIndexes.getConstView(),
+                         this->segments.getConstView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getSerializationType()
+{
+   return String( "Matrices::SparseMatrix< " ) +
+             TNL::getSerializationType< RealType >() + ", " +
+             TNL::getSerializationType< SegmentsType >() + ", [any_device], " +
+             TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+String
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename RowsCapacitiesVector >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities )
+{
+   TNL_ASSERT_EQ( rowsCapacities.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." );
+   using RowsCapacitiesVectorDevice = typename RowsCapacitiesVector::DeviceType;
+   if( std::is_same< DeviceType, RowsCapacitiesVectorDevice >::value )
+      this->segments.setSegmentsSizes( rowsCapacities );
+   else
+   {
+      RowsCapacitiesType thisRowsCapacities;
+      thisRowsCapacities = rowsCapacities;
+      this->segments.setSegmentsSizes( thisRowsCapacities );
+   }
+   if( ! isBinary() )
+   {
+      this->values.setSize( this->segments.getStorageSize() );
+      this->values = ( RealType ) 0;
+   }
+   this->columnIndexes.setSize( this->segments.getStorageSize() );
+   this->columnIndexes = this->getPaddingIndex();
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data )
+{
+   const auto& rows = this->getRows();
+   const auto& columns = this->getColumns();
+   Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 );
+   for( const auto& i : data )
+   {
+      if( std::get< 0 >( i ) >= rows )
+      {
+         std::stringstream s;
+         s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list";
+         throw std::logic_error( s.str() );
+      }
+      rowCapacities[ std::get< 0 >( i ) ]++;
+   }
+   SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns );
+   hostMatrix.setCompressedRowLengths( rowCapacities );
+   for( const auto& i : data )
+   {
+      if( std::get< 1 >( i ) >= columns )
+      {
+         std::stringstream s;
+         s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list";
+         throw std::logic_error( s.str() );
+      }
+      hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) );
+   }
+   ( *this ) = hostMatrix;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   this->view.getCompressedRowLengths( rowLengths );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Matrix_ >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setLike( const Matrix_& matrix )
+{
+   BaseType::setLike( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+Index
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getNumberOfNonzeroMatrixElements() const
+{
+   return this->view.getNumberOfNonzeroMatrixElements();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+reset()
+{
+   BaseType::reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__ auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__ auto
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
+{
+   this->view.setElement( row, column, value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   this->view.addElement( row, column, value, thisElementMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+Real
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getElement( const IndexType row,
+            const IndexType column ) const
+{
+   return this->view.getElement( row, column );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Vector >
+__cuda_callable__
+typename Vector::RealType
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+rowVectorProduct( const IndexType row,
+                  const Vector& vector ) const
+{
+   this->view.rowVectorProduct( row, vector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename InVector,
+       typename OutVector >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType& matrixMultiplicator,
+               const RealType& outVectorMultiplicator ) const
+{
+   this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator );
+   /*TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   const auto columnIndexesView = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+      const IndexType column = columnIndexesView[ globalIdx ];
+      compute = ( column != paddingIndex );
+      if( ! compute )
+         return 0.0;
+      return valuesView[ globalIdx ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );*/
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   /*const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
+      IndexType columnIdx = columns_view[ globalIdx ];
+      if( columnIdx != paddingIndex_ )
+         return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   this->view.forRows( first, last, function );
+   /*const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+    */
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   this->view.forRows( first, last, function );
+   /*auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );*/
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+/*template< typename Real,
+          template< typename, typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+
+}
+
+template< typename Real,
+          template< typename, typename, typename > class Segments,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, typename Index2 >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+
+}*/
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Vector1, typename Vector2 >
+bool
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+   return false;
+}
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+operator=( const SparseMatrix& matrix )
+{
+   Matrix< Real, Device, Index >::operator=( matrix );
+   this->columnIndexes = matrix.columnIndexes;
+   this->segments = matrix.segments;
+   this->view = this->getView();
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix )
+{
+   using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >;
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   this->setLike( matrix );
+   this->setCompressedRowLengths( rowLengths );
+   Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
+   rowLocalIndexes = 0;
+
+   // TODO: use getConstView when it works
+   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   auto rowLocalIndexes_view = rowLocalIndexes.getView();
+   columns_view = paddingIndex;
+
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
+         if( value != 0.0 )
+         {
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ );
+            columns_view[ thisGlobalIdx ] = columnIdx;
+            if( ! isBinary() )
+               values_view[ thisGlobalIdx ] = value;
+         }
+      };
+      matrix.forAllRows( f );
+   }
+   else
+   {
+      const IndexType maxRowLength = matrix.getColumns();
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize );
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+         thisColumnsBuffer = paddingIndex;
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+            matrixValuesBuffer_view[ bufferIdx ] = value;
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix and ignoring
+         // zero matrix elements.
+         const IndexType matrix_columns = this->getColumns();
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute  ) mutable {
+            RealType inValue( 0.0 );
+            IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] );
+            while( inValue == 0.0 && column < matrix_columns )
+            {
+               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + column++;
+               inValue = thisValuesBuffer_view[ bufferIdx ];
+            }
+            rowLocalIndexes_view[ rowIdx ] = column;
+            if( inValue == 0.0 )
+            {
+               columnIndex = paddingIndex;
+               value = 0.0;
+            }
+            else
+            {
+               columnIndex = column - 1;
+               value = inValue;
+            }
+         };
+         this->forRows( baseRow, lastRow, f2 );
+         baseRow += bufferRowsCount;
+      }
+      //std::cerr << "This matrix = " << std::endl << *this << std::endl;
+   }
+   this->view = this->getView();
+   return *this;
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename RHSMatrix >
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >&
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+operator=( const RHSMatrix& matrix )
+{
+   using RHSIndexType = typename RHSMatrix::IndexType;
+   using RHSRealType = typename RHSMatrix::RealType;
+   using RHSDeviceType = typename RHSMatrix::DeviceType;
+   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
+
+   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   this->setCompressedRowLengths( rowLengths );
+   Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
+   rowLocalIndexes = 0;
+
+   // TODO: use getConstView when it works
+   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   auto rowLocalIndexes_view = rowLocalIndexes.getView();
+   columns_view = paddingIndex;
+
+   if( std::is_same< DeviceType, RHSDeviceType >::value )
+   {
+      const auto segments_view = this->segments.getView();
+      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+         IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
+         if( value != 0.0 && columnIndex != paddingIndex )
+         {
+            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
+            columns_view[ thisGlobalIdx ] = columnIndex;
+            if( ! isBinary() )
+               values_view[ thisGlobalIdx ] = value;
+            rowLocalIndexes_view[ rowIdx ] = localIdx;
+         }
+      };
+      matrix.forAllRows( f );
+   }
+   else
+   {
+      const IndexType maxRowLength = max( rowLengths );
+      const IndexType bufferRowsCount( 128 );
+      const size_t bufferSize = bufferRowsCount * maxRowLength;
+      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
+      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
+      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
+      Containers::Vector< IndexType, DeviceType, IndexType > thisRowLengths;
+      thisRowLengths = rowLengths;
+      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
+      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
+      auto thisValuesBuffer_view = thisValuesBuffer.getView();
+      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();
+      matrixValuesBuffer_view = 0.0;
+
+      IndexType baseRow( 0 );
+      const IndexType rowsCount = this->getRows();
+      while( baseRow < rowsCount )
+      {
+         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+         thisColumnsBuffer = paddingIndex;
+         matrixColumnsBuffer_view = paddingIndex;
+
+         ////
+         // Copy matrix elements into buffer
+         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
+            if( columnIndex != paddingIndex )
+            {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
+               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
+               matrixValuesBuffer_view[ bufferIdx ] = value;
+               //std::cerr << " <<<<< rowIdx = " << rowIdx << " localIdx = " << localIdx << " value = " << value << " bufferIdx = " << bufferIdx << std::endl;
+            }
+         };
+         matrix.forRows( baseRow, lastRow, f1 );
+
+         ////
+         // Copy the source matrix buffer to this matrix buffer
+         thisValuesBuffer_view = matrixValuesBuffer_view;
+         thisColumnsBuffer_view = matrixColumnsBuffer_view;
+
+         ////
+         // Copy matrix elements from the buffer to the matrix and ignoring
+         // zero matrix elements
+         const IndexType matrix_columns = this->getColumns();
+         const auto thisRowLengths_view = thisRowLengths.getConstView();
+         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
+            RealType inValue( 0.0 );
+            size_t bufferIdx;
+            IndexType bufferLocalIdx( rowLocalIndexes_view[ rowIdx ] );
+            while( inValue == 0.0 && localIdx < thisRowLengths_view[ rowIdx ] )
+            {
+               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + bufferLocalIdx++;
+               TNL_ASSERT_LT( bufferIdx, bufferSize, "" );
+               inValue = thisValuesBuffer_view[ bufferIdx ];
+            }
+            //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx
+            //          << " inValue = " << inValue << " bufferIdx = " << bufferIdx << std::endl;
+            rowLocalIndexes_view[ rowIdx ] = bufferLocalIdx;
+            if( inValue == 0.0 )
+            {
+               columnIndex = paddingIndex;
+               value = 0.0;
+            }
+            else
+            {
+               columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
+               value = inValue;
+            }
+         };
+         this->forRows( baseRow, lastRow, f2 );
+         baseRow += bufferRowsCount;
+      }
+   }
+   this->view = this->getView();
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+save( File& file ) const
+{
+   this->view.save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+load( File& file )
+{
+   Matrix< RealType, DeviceType, IndexType >::load( file );
+   file >> this->columnIndexes;
+   this->segments.load( file );
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+print( std::ostream& str ) const
+{
+   this->view.print( str );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+__cuda_callable__
+Index
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+   } //namespace Matrices
+} // namespace  TNL
diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h
new file mode 100644
index 0000000000000000000000000000000000000000..8906ab5ae9fd1457ee6690597898a001bdab7c18
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixRowView.h
@@ -0,0 +1,67 @@
+ /***************************************************************************
+                          SparseMatrixRowView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+class SparseMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using SegmentViewType = SegmentView;
+      using IndexType = typename SegmentViewType::IndexType;
+      using ValuesViewType = ValuesView;
+      using ColumnsIndexesViewType = ColumnsIndexesView;
+
+      static constexpr bool isBinary() { return isBinary_; };
+
+      __cuda_callable__
+      SparseMatrixRowView( const SegmentViewType& segmentView,
+                           const ValuesViewType& values,
+                           const ColumnsIndexesViewType& columnIndexes );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType& getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      IndexType& getColumnIndex( const IndexType localIdx );
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const IndexType column,
+                       const RealType& value );
+   protected:
+
+      SegmentViewType segmentView;
+
+      ValuesViewType values;
+
+      ColumnsIndexesViewType columnIndexes;
+};
+   } // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/SparseMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..67d0845d4af23dd57065b516b212a278fbd0fd5d
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -0,0 +1,111 @@
+/***************************************************************************
+                          SparseMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/SparseMatrixRowView.h>
+
+namespace TNL {
+   namespace Matrices {
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+SparseMatrixRowView( const SegmentViewType& segmentView,
+                     const ValuesViewType& values,
+                     const ColumnsIndexesViewType& columnIndexes )
+ : segmentView( segmentView ), values( values ), columnIndexes( columnIndexes )
+{
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+getSize() const -> IndexType
+{
+   return segmentView.getSize();
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+getColumnIndex( const IndexType localIdx ) -> IndexType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." );
+   return values[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." );
+   return values[ segmentView.getGlobalIndex( localIdx ) ];
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ void
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+setElement( const IndexType localIdx,
+            const IndexType column,
+            const RealType& value )
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   const IndexType globalIdx = segmentView.getGlobalIndex( localIdx );
+   columnIndexes[ globalIdx ] = column;
+   if( ! isBinary() )
+      values[ globalIdx ] = value;
+}
+
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
new file mode 100644
index 0000000000000000000000000000000000000000..4fa65b70a09e7834aacac1ac80d74ee08c9e4ece
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -0,0 +1,168 @@
+/***************************************************************************
+                          SparseMatrixView.h -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Allocators/Default.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrixRowView.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device = Devices::Host,
+          typename Index = int,
+          typename MatrixType = GeneralMatrix,
+          template< typename Device_, typename Index_ > class SegmentsView = Containers::Segments::CSRView >
+class SparseMatrixView : public MatrixView< Real, Device, Index >
+{
+   public:
+      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
+      static constexpr bool isBinary() { return MatrixType::isBinary(); };
+
+      using RealType = Real;
+      template< typename Device_, typename Index_ >
+      using SegmentsViewTemplate = SegmentsView< Device_, Index_ >;
+      using SegmentsViewType = SegmentsView< Device, Index >;
+      using SegmentViewType = typename SegmentsViewType::SegmentViewType;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
+      using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
+      using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      __cuda_callable__
+      SparseMatrixView();
+
+      __cuda_callable__
+      SparseMatrixView( const IndexType rows,
+                        const IndexType columns,
+                        const ValuesViewType& values,
+                        const ColumnsIndexesViewType& columnIndexes,
+                        const SegmentsViewType& segments );
+
+      __cuda_callable__
+      SparseMatrixView( const SparseMatrixView& m ) = default;
+
+      //__cuda_callable__
+      //SparseMatrixView( const SparseMatrixView&& m ) = default;
+
+      __cuda_callable__
+      ViewType getView();
+
+      __cuda_callable__
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      void reset();
+
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      void addElement( IndexType row,
+                       IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      RealType getElement( IndexType row,
+                           IndexType column ) const;
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      /***
+       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType matrixMultiplicator = 1.0,
+                          const RealType outVectorMultiplicator = 0.0 ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector1, typename Vector2 >
+      bool performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      SparseMatrixView& operator=( const SparseMatrixView& matrix );
+
+      void save( File& file ) const;
+
+      void save( const String& fileName ) const;
+
+      void print( std::ostream& str ) const;
+
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+   protected:
+
+      ColumnsIndexesViewType columnIndexes;
+
+      SegmentsViewType segments;
+};
+
+}  // namespace Conatiners
+} // namespace TNL
+
+#include <TNL/Matrices/SparseMatrixView.hpp>
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2bae61f985c312279d60de9f809b71ea3a19629f
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -0,0 +1,699 @@
+/***************************************************************************
+                          SparseMatrixView.hpp -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <functional>
+#include <TNL/Matrices/SparseMatrixView.h>
+#include <TNL/Algorithms/Reduction.h>
+#include <TNL/Algorithms/AtomicOperations.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+SparseMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+SparseMatrixView( const IndexType rows,
+                  const IndexType columns,
+                  const ValuesViewType& values,
+                  const ColumnsIndexesViewType& columnIndexes,
+                  const SegmentsViewType& segments )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getView() -> ViewType
+{
+   return ViewType( this->getRows(),
+                    this->getColumns(),
+                    this->getValues().getView(),
+                    this->columnIndexes.getView(),
+                    this->segments.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->getRows(),
+                         this->getColumns(),
+                         this->getValues().getConstView(),
+                         this->getColumnsIndexes().getConstView(),
+                         this->segments.getConstView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+String
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getSerializationType()
+{
+   return String( "Matrices::SparseMatrix< " ) +
+             TNL::getSerializationType< RealType >() + ", " +
+             TNL::getSerializationType< SegmentsViewType >() + ", [any_device], " +
+             TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+String
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Vector >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRowLength( const IndexType row ) const
+{
+   return 0;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getNumberOfNonzeroMatrixElements() const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   if( ! isSymmetric() )
+   {
+      auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+         return ( columns_view[ i ] != paddingIndex );
+      };
+      return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
+   }
+   else
+   {
+      const auto rows = this->getRows();
+      const auto columns = this->getColumns();
+      Containers::Vector< IndexType, DeviceType, IndexType > row_sums( this->getRows(), 0 );
+      auto row_sums_view = row_sums.getView();
+      const auto columnIndexesView = this->columnIndexes.getConstView();
+      auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
+         const IndexType column = columnIndexesView[ globalIdx ];
+         compute = ( column != paddingIndex );
+         if( ! compute )
+            return 0.0;
+         return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements
+      };
+      auto reduction = [] __cuda_callable__ ( IndexType& sum, const IndexType& value ) {
+         sum += value;
+      };
+      auto keeper = [=] __cuda_callable__ ( IndexType row, const IndexType& value ) mutable {
+         row_sums_view[ row ] = value;
+      };
+      this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( IndexType ) 0 );
+      return sum( row_sums );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__ auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__ auto
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." );
+   return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+setElement( const IndexType row,
+            const IndexType column,
+            const RealType& value )
+{
+   this->addElement( row, column, value, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addElement( IndexType row,
+            IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." );
+   TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." );
+
+   if( isSymmetric() && row < column )
+   {
+      swap( row, column );
+      TNL_ASSERT_LT( row, this->getRows(), "Column index is out of the symmetric part of the matrix after transposition." );
+      TNL_ASSERT_LT( column,this->getColumns(), "Row index is out of the symmetric part of the matrix after transposition." );
+   }
+
+   const IndexType rowSize = this->segments.getSegmentSize( row );
+   IndexType col( this->getPaddingIndex() );
+   IndexType i;
+   IndexType globalIdx;
+   for( i = 0; i < rowSize; i++ )
+   {
+      globalIdx = this->segments.getGlobalIndex( row, i );
+      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
+      col = this->columnIndexes.getElement( globalIdx );
+      if( col == column )
+      {
+         if( ! isBinary() )
+            this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value );
+         return;
+      }
+      if( col == this->getPaddingIndex() || col > column )
+         break;
+   }
+   if( i == rowSize )
+   {
+      std::stringstream msg;
+      msg << "The capacity of the sparse matrix row number "  << row << " was exceeded.";
+      throw std::logic_error( msg.str() );
+   }
+   if( col == this->getPaddingIndex() )
+   {
+      this->columnIndexes.setElement( globalIdx, column );
+      if( ! isBinary() )
+         this->values.setElement( globalIdx, value );
+      return;
+   }
+   else
+   {
+      IndexType j = rowSize - 1;
+      while( j > i )
+      {
+         const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j );
+         const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 );
+         TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" );
+         TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" );
+         this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) );
+         if( ! isBinary() )
+            this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) );
+         j--;
+      }
+
+      this->columnIndexes.setElement( globalIdx, column );
+      if( ! isBinary() )
+         this->values.setElement( globalIdx, value );
+      return;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+Real
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getElement( IndexType row,
+            IndexType column ) const
+{
+   TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." );
+   TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." );
+   TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." );
+   TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." );
+
+   if( isSymmetric() && row < column )
+   {
+      swap( row, column );
+      if( row >= this->getRows() || column >= this->getColumns() )
+         return 0.0;
+   }
+
+   const IndexType rowSize = this->segments.getSegmentSize( row );
+   for( IndexType i = 0; i < rowSize; i++ )
+   {
+      const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+      TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" );
+      const IndexType col = this->columnIndexes.getElement( globalIdx );
+      if( col == column )
+      {
+         if( isBinary() )
+            return 1;
+         else
+            return this->values.getElement( globalIdx );
+      }
+   }
+   return 0.0;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Vector >
+__cuda_callable__
+typename Vector::RealType
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+rowVectorProduct( const IndexType row,
+                  const Vector& vector ) const
+{
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+template< typename InVector,
+       typename OutVector >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType matrixMultiplicator,
+               const RealType outVectorMultiplicator ) const
+{
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   const auto columnIndexesView = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   if( isSymmetric() )
+      outVector *= outVectorMultiplicator;
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> RealType {
+      const IndexType column = columnIndexesView[ globalIdx ];
+      compute = ( column != paddingIndex );
+      if( ! compute )
+         return 0.0;
+      if( isSymmetric() && column < row )
+      {
+         if( isBinary() )
+            Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] );
+         else
+            Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] );
+      }
+      if( isBinary() )
+         return inVectorView[ column ];
+      return valuesView[ globalIdx ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      if( isSymmetric() )
+         outVectorView[ row ] += matrixMultiplicator * value;
+      else
+      {
+         if( outVectorMultiplicator == 0.0 )
+            outVectorView[ row ] = matrixMultiplicator * value;
+         else
+            outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
+      }
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+
+   /*const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   const auto columnIndexesView = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
+      const IndexType column = columnIndexesView[ offset ];
+      compute = ( column != paddingIndex );
+      if( ! compute )
+         return 0.0;
+      return valuesView[ offset ] * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );
+   */
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
+      IndexType columnIdx = columns_view[ globalIdx ];
+      if( columnIdx != paddingIndex_ )
+      {
+         if( isBinary() )
+            return fetch( rowIdx, columnIdx, globalIdx, 1 );
+         else
+            return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
+      }
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool {
+      if( isBinary() )
+         function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute );
+      else
+         function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable {
+      if( isBinary() )
+      {
+         RealType one( columns_view[ globalIdx ] != paddingIndex_ );
+         function( rowIdx, localIdx, columns_view[ globalIdx ], one, compute );
+      }
+      else
+         function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
+   };
+   this->segments.forSegments( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+/*template< typename Real,
+          template< typename, typename > class SegmentsView,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+addMatrix( const SparseMatrixView< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+
+}
+
+template< typename Real,
+          template< typename, typename > class SegmentsView,
+          typename Device,
+          typename Index,
+          typename RealAllocator,
+          typename IndexAllocator >
+template< typename Real2, typename Index2 >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getTransposition( const SparseMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+
+}*/
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+template< typename Vector1, typename Vector2 >
+bool
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+   return false;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >&
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& matrix )
+{
+   MatrixView< Real, Device, Index >::operator=( matrix );
+   this->columnIndexes.bind( matrix.columnIndexes );
+   this->segments = matrix.segments;
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+save( File& file ) const
+{
+   MatrixView< RealType, DeviceType, IndexType >::save( file );
+   file << this->columnIndexes;
+   this->segments.save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+print( std::ostream& str ) const
+{
+   if( isSymmetric() )
+   {
+      for( IndexType row = 0; row < this->getRows(); row++ )
+      {
+         str <<"Row: " << row << " -> ";
+         for( IndexType column = 0; column < this->getColumns(); column++ )
+         {
+            auto value = this->getElement( row, column );
+            if( value != ( RealType ) 0 )
+               str << " Col:" << column << "->" << value << "\t";
+         }
+         str << std::endl;
+      }
+   }
+   else
+      for( IndexType row = 0; row < this->getRows(); row++ )
+      {
+         str <<"Row: " << row << " -> ";
+         const auto rowLength = this->segments.getSegmentSize( row );
+         for( IndexType i = 0; i < rowLength; i++ )
+         {
+            const IndexType globalIdx = this->segments.getGlobalIndex( row, i );
+            const IndexType column = this->columnIndexes.getElement( globalIdx );
+            if( column == this->getPaddingIndex() )
+               break;
+            RealType value;
+            if( isBinary() )
+               value = ( RealType ) 1.0;
+            else
+               value = this->values.getElement( globalIdx );
+            str << " Col:" << column << "->" << value << "\t";
+         }
+         str << std::endl;
+      }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+__cuda_callable__
+Index
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+   } //namespace Matrices
+} // namespace  TNL
diff --git a/src/TNL/Matrices/ThreePartVector.h b/src/TNL/Matrices/ThreePartVector.h
index f57e3e116a13e6f01d128284422a08f1c719c27c..f28f544f5bac6eeceb61d01ef49852fd1b36b6af 100644
--- a/src/TNL/Matrices/ThreePartVector.h
+++ b/src/TNL/Matrices/ThreePartVector.h
@@ -75,6 +75,17 @@ public:
          return right[ i - left.getSize() - middle.getSize() ];
    }
 
+   __cuda_callable__
+   const Real* getPointer( Index i ) const
+   {
+      if( i < left.getSize() )
+         return &left.getData()[ i ];
+      else if( i < left.getSize() + middle.getSize() )
+         return &middle.getData()[ i - left.getSize() ];
+      else
+         return &right.getData()[ i - left.getSize() - middle.getSize() ];
+   }
+
    friend std::ostream& operator<<( std::ostream& str, const ThreePartVectorView& v )
    {
       str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]";
@@ -143,6 +154,17 @@ public:
          return right[ i - left.getSize() - middle.getSize() ];
    }
 
+   __cuda_callable__
+   const Real* getPointer( Index i ) const
+   {
+      if( i < left.getSize() )
+         return &left.getData()[ i ];
+      else if( i < left.getSize() + middle.getSize() )
+         return &middle.getData()[ i - left.getSize() ];
+      else
+         return &right.getData()[ i - left.getSize() - middle.getSize() ];
+   }
+
    friend std::ostream& operator<<( std::ostream& str, const ThreePartVector& v )
    {
       str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]";
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 3f57fe1c3e6de1cf0e608cd68b5846eb711e321d..0297936810fc61a6b81ac21b03f49a87818eb53c 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -12,198 +12,185 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/TridiagonalRow.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
 
 namespace TNL {
-namespace Matrices {   
-
-template< typename Device >
-class TridiagonalDeviceDependentCode;
+namespace Matrices {
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
-class Tridiagonal : public Matrix< Real, Device, Index >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value,
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
+class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using RealAllocatorType = RealAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Tridiagonal< _Real, _Device, _Index >;
 
-   // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Tridiagonal;
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
 
-public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef TridiagonalRow< Real, Index > MatrixRow;
+      Tridiagonal();
 
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Tridiagonal< _Real, _Device, _Index >;
+      Tridiagonal( const IndexType rows, const IndexType columns );
 
-   Tridiagonal();
+      ViewType getView() const; // TODO: remove const
 
-   static String getSerializationType();
+      //ConstViewType getConstView() const;
 
-   virtual String getSerializationTypeVirtual() const;
+      static String getSerializationType();
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      virtual String getSerializationTypeVirtual() const;
 
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   IndexType getRowLength( const IndexType row ) const;
+      //template< typename Vector >
+      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   IndexType getMaxRowLength() const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+      IndexType getMaxRowLength() const;
 
-   IndexType getNumberOfMatrixElements() const;
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m );
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   IndexType getMaxRowlength() const;
+      void reset();
 
-   void reset();
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
 
-   void setValue( const RealType& v );
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
+      void setValue( const RealType& v );
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements );
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements );
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements,
-                    const RealType& thisRowMultiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements,
-                const RealType& thisRowMultiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   __cuda_callable__
-   RealType getElementFast( const IndexType row,
-                            const IndexType column ) const;
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   RealType getElement( const IndexType row,
-                        const IndexType column ) const;
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   __cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Real2, typename Index2 >
+      void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   template< typename InVector,
-             typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   template< typename Real2, typename Index2 >
-   void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      // copy assignment
+      Tridiagonal& operator=( const Tridiagonal& matrix );
 
-   template< typename Real2, typename Index2 >
-   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      // cross-device copy assignment
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix );
 
-   template< typename Vector1, typename Vector2 >
-   __cuda_callable__
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      void save( File& file ) const;
 
-   // copy assignment
-   Tridiagonal& operator=( const Tridiagonal& matrix );
+      void load( File& file );
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+      void save( const String& fileName ) const;
 
-   void save( File& file ) const;
+      void load( const String& fileName );
 
-   void load( File& file );
+      void print( std::ostream& str ) const;
 
-   void save( const String& fileName ) const;
+      const IndexerType& getIndexer() const;
 
-   void load( const String& fileName );
+      IndexerType& getIndexer();
 
-   void print( std::ostream& str ) const;
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
 
-protected:
+   protected:
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
 
-   Containers::Vector< RealType, DeviceType, IndexType > values;
+      IndexerType indexer;
 
-   typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class TridiagonalDeviceDependentCode< DeviceType >;
+      ViewType view;
 };
 
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Tridiagonal_impl.h>
+#include <TNL/Matrices/Tridiagonal.hpp>
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ddabc9852f6eeafe82b034f34917b916cec2ce2
--- /dev/null
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -0,0 +1,813 @@
+/***************************************************************************
+                          Tridiagonal.hpp  -  description
+                             -------------------
+    begin                : Nov 30, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <sstream>
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Device >
+class TridiagonalDeviceDependentCode;
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Tridiagonal()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+Tridiagonal( const IndexType rows, const IndexType columns )
+{
+   this->setDimensions( rows, columns );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getView() const -> ViewType
+{
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer );
+}
+
+/*template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}*/
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationType()
+{
+   return String( "Matrices::Tridiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+String
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setDimensions( const IndexType rows, const IndexType columns )
+{
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   this->indexer.setDimensions( rows, columns );
+   this->values.setSize( this->indexer.getStorageSize() );
+   this->values = 0.0;
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+ //  template< typename Vector >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
+{
+   if( max( rowLengths ) > 3 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( rowLengths.getElement( 0 ) > 2 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   if( this->getRows() > this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 1 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() == this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 2 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() < this->getColumns() )
+      if( rowLengths.getElement( this->getRows()-1 ) > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   return this->view.getCompressedRowLengths( rowLengths );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRowLength( const IndexType row ) const
+{
+   return this->view.getRowLength( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getMaxRowLength() const
+{
+   return this->view.getMaxRowLength();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
+{
+   this->setDimensions( m.getRows(), m.getColumns() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getNumberOfNonzeroMatrixElements() const
+{
+   return this->view.getNumberOfNonzeroMatrixElements();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+reset()
+{
+   Matrix< Real, Device, Index >::reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+bool
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const
+{
+   return ! this->operator==( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setValue( const RealType& v )
+{
+   this->view.setValue( v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return this->view.getRow( rowIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   this->view.setElement( row, column, value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   this->view.addElement( row, column, value, thisElementMultiplicator );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Real
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElement( const IndexType row, const IndexType column ) const
+{
+   return this->view.getElement( row, column );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   this->view.forRows( first, last, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+  template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   this->view.forRows( first, last, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function ) const
+{
+   this->view.forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function )
+{
+   this->view.forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+   return this->view.rowVectorProduct();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename InVector,
+             typename OutVector >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   this->view.vectorProduct( inVector, outVector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator );
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Tridiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real2, typename Index2 >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                                                                    const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b,
+                                                              const IndexType row,
+                                                              Vector2& x,
+                                                              const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal& matrix )
+{
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+}
+
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
+{
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
+                  "unknown device" );
+
+   this->setLike( matrix );
+   if( RowMajorOrder == RowMajorOrder_ )
+      this->values = matrix.getValues();
+   else
+   {
+      if( std::is_same< Device, Device_ >::value )
+      {
+         const auto matrix_view = matrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+      else
+      {
+         Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix;
+         auxMatrix = matrix;
+         const auto matrix_view = auxMatrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+   }
+   return *this;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
+{
+   Matrix< Real, Device, Index >::save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
+{
+   Matrix< Real, Device, Index >::load( file );
+   this->indexer.setDimensions( this->getRows(), this->getColumns() );
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+print( std::ostream& str ) const
+{
+   this->view.print( str );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElementIndex( const IndexType row, const IndexType column ) const
+{
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+
+   return this->indexer.getGlobalIndex( row, localIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+Index
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getPaddingIndex() const
+{
+   return this->view.getPaddingIndex();
+}
+
+/*
+template<>
+class TridiagonalDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return 2*row + column;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType  >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ 1 ];
+         Index i = 3 * row;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i - 1 ] +
+                   vector[ row ] * values[ i ];
+         return vector[ row - 1 ] * values[ i - 1 ] +
+                vector[ row ] * values[ i ] +
+                vector[ row + 1 ] * values[ i + 1 ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class TridiagonalDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return ( column - row + 1 )*rows + row - 1;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ rows - 1 ];
+         Index i = row - 1;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i ] +
+                   vector[ row ] * values[ i + rows ];
+         return vector[ row - 1 ] * values[ i ] +
+                vector[ row ] * values[ i + rows ] +
+                vector[ row + 1 ] * values[ i + 2*rows ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+ */
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.h b/src/TNL/Matrices/TridiagonalMatrixRowView.h
new file mode 100644
index 0000000000000000000000000000000000000000..e77d826e052ad5bad9d5dec95dd05059e57afe92
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.h
@@ -0,0 +1,59 @@
+/***************************************************************************
+                          TridiagonalMatrixRowView.h  -  description
+                             -------------------
+    begin                : Dec 31, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView,
+          typename Indexer >
+class TridiagonalMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using IndexType = typename ValuesView::IndexType;
+      using ValuesViewType = ValuesView;
+      using IndexerType = Indexer;
+
+      __cuda_callable__
+      TridiagonalMatrixRowView( const IndexType rowIdx,
+                                const ValuesViewType& values,
+                                const IndexerType& indexer );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const RealType& value );
+   protected:
+
+      IndexType rowIdx;
+
+      ValuesViewType values;
+
+      Indexer indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/TridiagonalMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..80fc1a26d52c32b60d1e184ee0beb87ef908c687
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
@@ -0,0 +1,75 @@
+/***************************************************************************
+                          TridiagonalMatrixRowView.hpp  -  description
+                             -------------------
+    begin                : Dec 31, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+TridiagonalMatrixRowView( const IndexType rowIdx,
+                          const ValuesViewType& values,
+                          const IndexerType& indexer )
+: rowIdx( rowIdx ), values( values ), indexer( indexer )
+{
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getSize() const -> IndexType
+{
+   return indexer.getRowSize( rowIdx );
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType
+{
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return rowIdx + localIdx - 1;
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+void 
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+setElement( const IndexType localIdx,
+            const RealType& value )
+{
+   this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
new file mode 100644
index 0000000000000000000000000000000000000000..82b76c73f76f2695c5eb3dacdcf685fad4466fc1
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -0,0 +1,169 @@
+/***************************************************************************
+                          TridiagonalMatrixView.h  -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/MatrixView.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
+{
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
+      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
+
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value >
+      using Self = TridiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >;
+
+      TridiagonalMatrixView();
+
+      TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer );
+
+      ViewType getView();
+
+      ConstViewType getConstView() const;
+
+      static String getSerializationType();
+
+      virtual String getSerializationTypeVirtual() const;
+
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      IndexType getMaxRowLength() const;
+
+      IndexType getNumberOfNonzeroMatrixElements() const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      void setValue( const RealType& v );
+
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
+
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
+
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
+
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      void save( File& file ) const;
+
+      void save( const String& fileName ) const;
+
+      void print( std::ostream& str ) const;
+
+      __cuda_callable__
+      const IndexerType& getIndexer() const;
+
+      __cuda_callable__
+      IndexerType& getIndexer();
+
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
+
+      IndexerType indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/TridiagonalMatrixView.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..99e3e87d4ab189e50d26dc363d78313723ea930c
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -0,0 +1,705 @@
+/***************************************************************************
+                          TridiagonalMatrixView.hpp  -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+TridiagonalMatrixView()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer )
+: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getView() -> ViewType
+{
+   return ViewType( this->values.getView(), indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
+{
+   return String( "Matrices::Tridiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+String
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRowLength( const IndexType row ) const
+{
+   return this->indexer.getRowSize( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getMaxRowLength() const
+{
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   return ! this->operator==( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setValue( const RealType& v )
+{
+   this->values = v;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return RowView( rowIdx, this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return RowView( rowIdx, this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
+   this->values.setElement( this->getElementIndex( row, column ), value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+   {
+      std::stringstream msg;
+      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
+      throw std::logic_error( msg.str() );
+   }
+   const Index i = this->getElementIndex( row, column );
+   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+Real
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row, const IndexType column ) const
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
+{
+   using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
+   const auto values_view = this->values.getConstView();
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      Real_ sum( zero );
+      if( rowIdx == 0 )
+      {
+         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
+         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
+         keep( 0, sum );
+         return;
+      }
+      if( rowIdx + 1 < indexer.getColumns() )
+      {
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
+         keep( rowIdx, sum );
+         return;
+      }
+      if( rowIdx < indexer.getColumns() )
+      {
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         keep( rowIdx, sum );
+      }
+      else
+      {
+         keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer = this->indexer;
+   bool compute( true );
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      if( rowIdx == 0 )
+      {
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute );
+      }
+      else if( rowIdx < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
+      }
+      else
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+  template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   const auto indexer = this->indexer;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      if( rowIdx == 0 )
+      {
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+      }
+      else if( rowIdx < indexer.getColumns() )
+      {
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+      }
+      else
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->indexer.getNonemptyRowsCount(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType 
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename InVector,
+             typename OutVector >
+void 
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
+{
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
+
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
+      return value * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." );
+   TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." );
+
+   if( RowMajorOrder == RowMajorOrder_ )
+   {
+      if( thisMatrixMultiplicator == 1.0 )
+         this->values += matrixMultiplicator * matrix.getValues();
+      else
+         this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues();
+   }
+   else
+   {
+      const auto matrix_view = matrix;
+      const auto matrixMult = matrixMultiplicator;
+      const auto thisMult = thisMatrixMultiplicator;
+      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      if( thisMult == 0.0 )
+         this->forAllRows( add0 );
+      else if( thisMult == 1.0 )
+         this->forAllRows( add1 );
+      else
+         this->forAllRows( addGen );
+   }
+}
+
+#ifdef HAVE_CUDA
+/*template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Tridiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}*/
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real2, typename Index2 >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      /*Tridiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;*/
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
+{
+   MatrixView< Real, Device, Index >::save( file );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = row - 1; column < row + 2; column++ )
+         if( column >= 0 && column < this->columns )
+         {
+            auto v = this->getElement( row, column );
+            if( v )
+               str << " Col:" << column << "->" << v << "\t";
+         }
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElementIndex( const IndexType row, const IndexType column ) const
+{
+   IndexType localIdx = column - row;
+   if( row > 0 )
+      localIdx++;
+
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+
+   return this->indexer.getGlobalIndex( row, localIdx );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getPaddingIndex() const
+{
+   return -1;
+}
+
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalRow.h b/src/TNL/Matrices/TridiagonalRow.h
deleted file mode 100644
index 9d06b39e18f8914957852694a6b4fd98d42e0f33..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/TridiagonalRow.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/***************************************************************************
-                          TridiagonalRow.h  -  description
-                             -------------------
-    begin                : Dec 31, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-class TridiagonalRow
-{
-   public:
-
-      __cuda_callable__
-      TridiagonalRow();
-
-      __cuda_callable__
-      TridiagonalRow( Real* values,
-                               const Index row,
-                               const Index columns,
-                               const Index step );
-
-      __cuda_callable__
-      void bind( Real* values,
-                 const Index row,
-                 const Index columns,
-                 const Index step );
-
-      __cuda_callable__
-      void setElement( const Index& elementIndex,
-                       const Index& column,
-                       const Real& value );
-
-   protected:
-
-      Real* values;
-
-      Index row, columns, step;
-};
-
-} // namespace Matrices
-} // namespace TNL
-
-#include <TNL/Matrices/TridiagonalRow_impl.h>
diff --git a/src/TNL/Matrices/TridiagonalRow_impl.h b/src/TNL/Matrices/TridiagonalRow_impl.h
deleted file mode 100644
index f5b7e842a4c4b69c77aa11f2ee09984eb46f9808..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/TridiagonalRow_impl.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/***************************************************************************
-                          TridiagonalRow_impl.h  -  description
-                             -------------------
-    begin                : Dec 31, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-__cuda_callable__
-TridiagonalRow< Real, Index >::
-TridiagonalRow()
-: values( 0 ),
-  row( 0 ),
-  columns( 0 ),
-  step( 0 )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-TridiagonalRow< Real, Index >::
-TridiagonalRow( Real* values,
-                         const Index row,
-                         const Index columns,
-                         const Index step )
-: values( values ),
-  row( row ),
-  columns( columns ),
-  step( step )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-TridiagonalRow< Real, Index >::
-bind( Real* values,
-      const Index row,
-      const Index columns,
-      const Index step )
-{
-   this->values = values;
-   this->row = row;
-   this->columns = columns;
-   this->step = step;
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-TridiagonalRow< Real, Index >::
-setElement( const Index& elementIndex,
-            const Index& column,
-            const Real& value )
-{
-   TNL_ASSERT( this->values, );
-   TNL_ASSERT( this->step > 0,);
-   TNL_ASSERT( column >= 0 && column < this->columns,
-              std::cerr << "column = " << columns << " this->columns = " << this->columns );
-   TNL_ASSERT( abs( column - row ) <= 1,
-              std::cerr << "column = " << column << " row =  " << row );
-
-   /****
-    * this->values stores an adress of the diagonal element
-    */
-   this->values[ ( column - row ) * this->step ] = value;
-}
-
-} // namespace Matrices
-} // namespace TNL
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
deleted file mode 100644
index 2752f6850320035dca48169c5e1ae2806aa47ff5..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ /dev/null
@@ -1,759 +0,0 @@
-/***************************************************************************
-                          Tridiagonal_impl.h  -  description
-                             -------------------
-    begin                : Nov 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Assert.h>
-#include <TNL/Matrices/Tridiagonal.h>
-#include <TNL/Exceptions/NotImplementedError.h>
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Device >
-class TridiagonalDeviceDependentCode;
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >::Tridiagonal()
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Tridiagonal< " ) +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          String( Device :: getDeviceType() ) +
-          String( ", " ) +
-          String( TNL::getType< Index >() ) +
-          String( " >" );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
-{
-   return this->getType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationType()
-{
-   return String( "Matrices::Tridiagonal< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
-{
-   return this->getSerializationType();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                        const IndexType columns )
-{
-   Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   values.setSize( 3*min( rows, columns ) );
-   this->values.setValue( 0.0 );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
-{
-   if( rowLengths[ 0 ] > 2 )
-      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   for( Index i = 1; i < diagonalLength-1; i++ )
-      if( rowLengths[ i ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() > this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 1 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() == this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 2 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() < this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
-{
-   return this->getRowLengthFast( row );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
-{
-   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   if( row == 0 )
-      return 2;
-   if( row > 0 && row < diagonalLength - 1 )
-      return 3;
-   if( this->getRows() > this->getColumns() )
-      return 1;
-   if( this->getRows() == this->getColumns() )
-      return 2;
-   return 3;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const
-{
-   return 3;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
-{
-   this->setDimensions( m.getRows(), m.getColumns() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const
-{
-   return 3 * min( this->getRows(), this->getColumns() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const
-{
-   IndexType nonzeroElements = 0;
-   for( IndexType i = 0; i < this->values.getSize(); i++ )
-      if( this->values.getElement( i ) != 0 )
-         nonzeroElements++;
-   return nonzeroElements;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index
-Tridiagonal< Real, Device, Index >::
-getMaxRowlength() const
-{
-   return 3;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::reset()
-{
-   Matrix< Real, Device, Index >::reset();
-   this->values.reset();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
-{
-   return this->values == matrix.values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
-{
-   return this->values != matrix.values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setValue( const RealType& v )
-{
-   this->values.setValue( v );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value )
-{
-   this->values[ this->getElementIndex( row, column ) ] = value;
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value )
-{
-   this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value,
-                                                                  const RealType& thisElementMultiplicator )
-{
-   const Index i = this->getElementIndex( row, column );
-   this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value;
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value,
-                                                              const RealType& thisElementMultiplicator )
-{
-   const Index i = this->getElementIndex( row, column );
-   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRowFast( row, columns, values, elements, 0.0 );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRow( row, columns, values, elements, 0.0 );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements,
-                                                              const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType& column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElementFast( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements,
-                                                          const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElement( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row,
-                                                                  const IndexType column ) const
-{
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values[ this->getElementIndex( row, column ) ];
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row,
-                                                              const IndexType column ) const
-{
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row,
-                                                              IndexType* columns,
-                                                              RealType* values ) const
-{
-   IndexType elementPointer( 0 );
-   for( IndexType i = -1; i <= 1; i++ )
-   {
-      const IndexType column = row + 1;
-      if( column >= 0 && column < this->getColumns() )
-      {
-         columns[ elementPointer ] = column;
-         values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ];
-         elementPointer++;
-      }
-   }
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
-getRow( const IndexType rowIndex )
-{
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        this->rows );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-const typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
-getRow( const IndexType rowIndex ) const
-{
-   throw Exceptions::NotImplementedError();
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-template< typename Vector >
-__cuda_callable__
-typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row,
-                                                                                         const Vector& vector ) const
-{
-   return TridiagonalDeviceDependentCode< Device >::
-             rowVectorProduct( this->rows,
-                               this->values,
-                               row,
-                               vector );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename InVector,
-             typename OutVector >
-void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector,
-                                                                 OutVector& outVector ) const
-{
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
-
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                    const RealType& matrixMultiplicator,
-                                                    const RealType& thisMatrixMultiplicator )
-{
-   TNL_ASSERT( this->getRows() == matrix.getRows(),
-            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
-                 << "This matrix rows: " << this->getRows() << std::endl );
-
-   if( thisMatrixMultiplicator == 1.0 )
-      this->values += matrixMultiplicator * matrix.values;
-   else
-      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
-}
-
-#ifdef HAVE_CUDA
-template< typename Real,
-          typename Real2,
-          typename Index,
-          typename Index2 >
-__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
-                                                             Tridiagonal< Real, Devices::Cuda, Index >* outMatrix,
-                                                             const Real matrixMultiplicator,
-                                                             const Index gridIdx )
-{
-   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx < inMatrix->getRows() )
-   {
-      if( rowIdx > 0 )
-        outMatrix->setElementFast( rowIdx-1,
-                                   rowIdx,
-                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
-      outMatrix->setElementFast( rowIdx,
-                                 rowIdx,
-                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
-      if( rowIdx < inMatrix->getRows()-1 )
-         outMatrix->setElementFast( rowIdx+1,
-                                    rowIdx,
-                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
-   }
-}
-#endif
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                                    const RealType& matrixMultiplicator )
-{
-   TNL_ASSERT( this->getRows() == matrix.getRows(),
-               std::cerr << "This matrix rows: " << this->getRows() << std::endl
-                    << "That matrix rows: " << matrix.getRows() << std::endl );
-   if( std::is_same< Device, Devices::Host >::value )
-   {
-      const IndexType& rows = matrix.getRows();
-      for( IndexType i = 1; i < rows; i++ )
-      {
-         RealType aux = matrix. getElement( i, i - 1 );
-         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
-         this->setElement( i, i, matrix.getElement( i, i ) );
-         this->setElement( i - 1, i, aux );
-      }
-   }
-   if( std::is_same< Device, Devices::Cuda >::value )
-   {
-#ifdef HAVE_CUDA
-      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
-      typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
-      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
-      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
-      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
-      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
-      {
-         if( gridIdx == cudaGrids - 1 )
-            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-         TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
-                                                    ( kernel_inMatrix,
-                                                      kernel_this,
-                                                      matrixMultiplicator,
-                                                      gridIdx );
-      }
-      Cuda::freeFromDevice( kernel_this );
-      Cuda::freeFromDevice( kernel_inMatrix );
-      TNL_CHECK_CUDA_DEVICE;
-#endif
-   }
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Vector1, typename Vector2 >
-__cuda_callable__
-void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
-                                                              const IndexType row,
-                                                              Vector2& x,
-                                                              const RealType& omega ) const
-{
-   RealType sum( 0.0 );
-   if( row > 0 )
-      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
-   if( row < this->getColumns() - 1 )
-      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
-   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
-}
-
-
-// copy assignment
-template< typename Real,
-          typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
-{
-   this->setLike( matrix );
-   this->values = matrix.values;
-   return *this;
-}
-
-// cross-device copy assignment
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2, typename >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
-{
-   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
-                  "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
-                  "unknown device" );
-
-   this->setLike( matrix );
-
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet.");
-}
-
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( File& file ) const
-{
-   Matrix< Real, Device, Index >::save( file );
-   file << this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( File& file )
-{
-   Matrix< Real, Device, Index >::load( file );
-   file >> this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const
-{
-   Object::save( fileName );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( const String& fileName )
-{
-   Object::load( fileName );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
-{
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      for( IndexType column = row - 1; column < row + 2; column++ )
-         if( column >= 0 && column < this->columns )
-            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
-      str << std::endl;
-   }
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row,
-                                                                    const IndexType column ) const
-{
-   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
-              std::cerr << " this->rows = " << this->rows
-                   << " row = " << row << " column = " << column );
-   TNL_ASSERT( abs( row - column ) < 2,
-              std::cerr << "row = " << row << " column = " << column << std::endl );
-   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
-}
-
-template<>
-class TridiagonalDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return 2*row + column;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType  >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ 1 ];
-         Index i = 3 * row;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i - 1 ] +
-                   vector[ row ] * values[ i ];
-         return vector[ row - 1 ] * values[ i - 1 ] +
-                vector[ row ] * values[ i ] +
-                vector[ row + 1 ] * values[ i + 1 ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class TridiagonalDeviceDependentCode< Devices::Cuda >
-{
-   public:
- 
-      typedef Devices::Cuda Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return ( column - row + 1 )*rows + row - 1;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ rows - 1 ];
-         Index i = row - 1;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i ] +
-                   vector[ row ] * values[ i + rows ];
-         return vector[ row - 1 ] * values[ i ] +
-                vector[ row ] * values[ i + rows ] +
-                vector[ row + 1 ] * values[ i + 2*rows ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
-
-} // namespace Matrices
-} // namespace TNL
diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..96930b38660e6a853b59fa27ca59236a04a0397f
--- /dev/null
+++ b/src/TNL/Matrices/details/DenseMatrix.h
@@ -0,0 +1,320 @@
+/***************************************************************************
+                          DenseMatrix.h  -  description
+                             -------------------
+    begin                : Jan 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Device >
+class DenseDeviceDependentCode;
+template<>
+class DenseDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class DenseDeviceDependentCode< Devices::Cuda >
+{
+   public:
+
+      typedef Devices::Cuda Device;
+
+      template< typename Real,
+                typename Index,
+                bool RowMajorOrder,
+                typename RealAllocator,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename Matrix1,
+          typename Matrix2,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void
+DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix,
+                          const Matrix1* matrixA,
+                          const Matrix2* matrixB,
+                          const Real matrixAMultiplicator,
+                          const Real matrixBMultiplicator,
+                          const Index gridIdx_x,
+                          const Index gridIdx_y )
+{
+   /****
+    * Here we compute product C = A * B. To profit from the fast
+    * shared memory we do it by tiles.
+    */
+
+   typedef Index IndexType;
+   typedef Real RealType;
+   __shared__ Real tileA[ tileDim*tileDim ];
+   __shared__ Real tileB[ tileDim*tileDim ];
+   __shared__ Real tileC[ tileDim*tileDim ];
+
+   const IndexType& matrixARows = matrixA->getRows();
+   const IndexType& matrixAColumns = matrixA->getColumns();
+   const IndexType& matrixBRows = matrixB->getRows();
+   const IndexType& matrixBColumns = matrixB->getColumns();
+
+   /****
+    * Reset the tile C
+    */
+   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+      tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0;
+
+   /****
+    * Compute the result tile coordinates
+    */
+   const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim;
+   const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim;
+
+   /****
+    * Sum over the matrix tiles
+    */
+   for( IndexType i = 0; i < matrixAColumns; i += tileDim )
+   {
+      for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+      {
+         const IndexType matrixARow = resultTileRow + threadIdx.y + row;
+         const IndexType matrixAColumn = i + threadIdx.x;
+         if( matrixARow < matrixARows && matrixAColumn < matrixAColumns )
+            tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
+               matrixAMultiplicator * matrixA->getElementFast( matrixARow,  matrixAColumn );
+
+         const IndexType matrixBRow = i + threadIdx.y + row;
+         const IndexType matrixBColumn = resultTileColumn + threadIdx.x;
+         if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns )
+            tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] =
+               matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn );
+      }
+      __syncthreads();
+
+      const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow );
+      const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i );
+      const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i );
+      const IndexType tileBLastColumn =
+         tnlCudaMin( tileDim, matrixBColumns - resultTileColumn );
+
+      for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize )
+      {
+         RealType sum( 0.0 );
+         for( IndexType j = 0; j < tileALastColumn; j++ )
+            sum += tileA[ ( threadIdx.y + row )*tileDim + j ]*
+                      tileB[ j*tileDim + threadIdx.x ];
+         tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum;
+      }
+      __syncthreads();
+   }
+
+   /****
+    * Write the result tile to the result matrix
+    */
+   const IndexType& matrixCRows = resultMatrix->getRows();
+   const IndexType& matrixCColumns = resultMatrix->getColumns();
+   for( IndexType row = 0; row < tileDim; row += tileRowBlockSize )
+   {
+      const IndexType matrixCRow = resultTileRow + row + threadIdx.y;
+      const IndexType matrixCColumn = resultTileColumn + threadIdx.x;
+      if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns )
+         resultMatrix->setElementFast( matrixCRow,
+                                       matrixCColumn,
+                                       tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] );
+   }
+
+}
+
+template< typename Real,
+          typename Index,
+          typename Matrix,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                          const Matrix* inputMatrix,
+                                                          const Real matrixMultiplicator,
+                                                          const Index gridIdx_x,
+                                                          const Index gridIdx_y )
+{
+   __shared__ Real tile[ tileDim*tileDim ];
+
+   const Index columns = inputMatrix->getColumns();
+   const Index rows = inputMatrix->getRows();
+
+
+   /****
+    * Diagonal mapping of the CUDA blocks
+    */
+   Index blockIdx_x, blockIdx_y;
+   if( columns == rows )
+   {
+      blockIdx_y = blockIdx.x;
+      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
+   }
+   else
+   {
+      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
+      blockIdx_y = bID % gridDim.y;
+      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
+   }
+
+   /****
+    * Read the tile to the shared memory
+    */
+   const Index readRowPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
+   const Index readColumnPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
+   for( Index rowBlock = 0;
+        rowBlock < tileDim;
+        rowBlock += tileRowBlockSize )
+   {
+      tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+               inputMatrix->getElementFast( readColumnPosition,
+                                            readRowPosition + rowBlock );
+   }
+   __syncthreads();
+
+   /****
+    * Write the tile to the global memory
+    */
+   const Index writeRowPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
+   const Index writeColumnPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
+   for( Index rowBlock = 0;
+        rowBlock < tileDim;
+        rowBlock += tileRowBlockSize )
+   {
+      resultMatrix->setElementFast( writeColumnPosition,
+                                    writeRowPosition + rowBlock,
+                                    matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+
+   }
+
+}
+
+template< typename Real,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator,
+          typename Matrix,
+          int tileDim,
+          int tileRowBlockSize >
+__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix,
+                                                             const Matrix* inputMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx_x,
+                                                             const Index gridIdx_y )
+{
+   __shared__ Real tile[ tileDim*tileDim ];
+
+   const Index columns = inputMatrix->getColumns();
+   const Index rows = inputMatrix->getRows();
+
+   /****
+    * Diagonal mapping of the CUDA blocks
+    */
+   Index blockIdx_x, blockIdx_y;
+   if( columns == rows )
+   {
+      blockIdx_y = blockIdx.x;
+      blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x;
+   }
+   else
+   {
+      Index bID = blockIdx.x + gridDim.x*blockIdx.y;
+      blockIdx_y = bID % gridDim.y;
+      blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x;
+   }
+
+   /****
+    * Read the tile to the shared memory
+    */
+   const Index readRowPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y;
+   const Index readColumnPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x;
+   if( readColumnPosition < columns )
+   {
+      const Index readOffset = readRowPosition * columns + readColumnPosition;
+      for( Index rowBlock = 0;
+           rowBlock < tileDim;
+           rowBlock += tileRowBlockSize )
+      {
+         if( readRowPosition + rowBlock < rows )
+            tile[ Cuda::getInterleaving( threadIdx.x*tileDim +  threadIdx.y + rowBlock ) ] =
+               inputMatrix->getElementFast( readColumnPosition,
+                                            readRowPosition + rowBlock );
+      }
+   }
+   __syncthreads();
+
+   /****
+    * Write the tile to the global memory
+    */
+   const Index writeRowPosition =
+      ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y;
+   const Index writeColumnPosition =
+      ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x;
+   if( writeColumnPosition < rows )
+   {
+      const Index writeOffset = writeRowPosition * rows + writeColumnPosition;
+      for( Index rowBlock = 0;
+           rowBlock < tileDim;
+           rowBlock += tileRowBlockSize )
+      {
+         if( writeRowPosition + rowBlock < columns )
+            resultMatrix->setElementFast( writeColumnPosition,
+                                          writeRowPosition + rowBlock,
+                                          matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] );
+      }
+   }
+
+}
+
+#endif
+
+      } //namespace details
+   } //namepsace Matrices
+} //namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
new file mode 100644
index 0000000000000000000000000000000000000000..3597c30f7d3eec37ef85b050cb01963e5f34715a
--- /dev/null
+++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
@@ -0,0 +1,109 @@
+/***************************************************************************
+                          MultidiagonalMatrixIndexer.h  -  description
+                             -------------------
+    begin                : Jan 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Index,
+          bool RowMajorOrder >
+class MultidiagonalMatrixIndexer
+{
+   public:
+
+      using IndexType = Index;
+
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
+      __cuda_callable__
+      MultidiagonalMatrixIndexer()
+      : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){};
+
+      __cuda_callable__
+      MultidiagonalMatrixIndexer( const IndexType& rows,
+                                  const IndexType& columns,
+                                  const IndexType& diagonals,
+                                  const IndexType& nonemptyRows )
+      : rows( rows ), 
+        columns( columns ),
+        diagonals( diagonals ),
+        nonemptyRows( nonemptyRows ) {};
+
+      __cuda_callable__
+      MultidiagonalMatrixIndexer( const MultidiagonalMatrixIndexer& indexer )
+      : rows( indexer.rows ),
+        columns( indexer.columns ),
+        diagonals( indexer.diagonals ),
+        nonemptyRows( indexer.nonemptyRows ) {};
+
+      void set( const IndexType& rows,
+                const IndexType& columns,
+                const IndexType& diagonals,
+                const IndexType& nonemptyRows )
+      {
+         this->rows = rows;
+         this->columns = columns;
+         this->diagonals = diagonals;
+         this->nonemptyRows = nonemptyRows;
+      };
+
+      /*__cuda_callable__
+      IndexType getRowSize( const IndexType rowIdx ) const
+      {
+         if( rowIdx == 0 )
+            return 2;
+         if( columns <= rows )
+         {
+            if( rowIdx == columns - 1 )
+               return 2;
+            if( rowIdx == columns )
+               return 1;
+         }
+         return 3;
+      };*/
+
+      __cuda_callable__
+      const IndexType& getRows() const { return this->rows; };
+
+      __cuda_callable__
+      const IndexType& getColumns() const { return this->columns; };
+
+      __cuda_callable__
+      const IndexType& getDiagonals() const { return this->diagonals; };
+
+      __cuda_callable__
+      const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; };
+
+      __cuda_callable__
+      IndexType getStorageSize() const { return diagonals * this->nonemptyRows; };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
+      {
+         TNL_ASSERT_GE( localIdx, 0, "" );
+         TNL_ASSERT_LT( localIdx, diagonals, "" );
+         TNL_ASSERT_GE( rowIdx, 0, "" );
+         TNL_ASSERT_LT( rowIdx, this->rows, "" );
+         
+         if( RowMajorOrder )
+            return diagonals * rowIdx + localIdx;
+         else
+            return localIdx * nonemptyRows + rowIdx;
+      };
+
+      protected:
+
+         IndexType rows, columns, diagonals, nonemptyRows;
+};
+      } //namespace details
+   } // namespace Materices
+} // namespace TNL
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
new file mode 100644
index 0000000000000000000000000000000000000000..64beb44f736152c1ddb57e2b2647e2e1cd8f8870
--- /dev/null
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -0,0 +1,92 @@
+/***************************************************************************
+                          TridiagonalMatrixIndexer.h  -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Index,
+          bool RowMajorOrder >
+class TridiagonalMatrixIndexer
+{
+   public:
+
+      using IndexType = Index;
+
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer()
+      : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){};
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns )
+      : rows( rows ), columns( columns ), nonemptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {};
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer )
+      : rows( indexer.rows ), columns( indexer.columns ), nonemptyRows( indexer.nonemptyRows ) {};
+
+      void setDimensions( const IndexType& rows, const IndexType& columns )
+      {
+         this->rows = rows;
+         this->columns = columns;
+         this->nonemptyRows = min( rows, columns ) + ( rows > columns );
+      };
+
+      __cuda_callable__
+      IndexType getRowSize( const IndexType rowIdx ) const
+      {
+         if( rowIdx == 0 )
+            return 2;
+         if( columns <= rows )
+         {
+            if( rowIdx == columns - 1 )
+               return 2;
+            if( rowIdx == columns )
+               return 1;
+         }
+         return 3;
+      };
+
+      __cuda_callable__
+      const IndexType& getRows() const { return this->rows; };
+
+      __cuda_callable__
+      const IndexType& getColumns() const { return this->columns; };
+
+      __cuda_callable__
+      const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; };
+      __cuda_callable__
+      IndexType getStorageSize() const { return 3 * this->nonemptyRows; };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
+      {
+         TNL_ASSERT_GE( localIdx, 0, "" );
+         TNL_ASSERT_LT( localIdx, 3, "" );
+         TNL_ASSERT_GE( rowIdx, 0, "" );
+         TNL_ASSERT_LT( rowIdx, this->rows, "" );
+
+         if( RowMajorOrder )
+            return 3 * rowIdx + localIdx;
+         else
+            return localIdx * nonemptyRows + rowIdx;
+      };
+
+      protected:
+
+         IndexType rows, columns, nonemptyRows;
+};
+      } //namespace details
+   } // namespace Materices
+} // namespace TNL
diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h
index 26df28965ec42e855fd034de7dea748999381e67..6a89742270b7f683764184227020351069059bfa 100644
--- a/src/TNL/Problems/HeatEquationProblem.h
+++ b/src/TNL/Problems/HeatEquationProblem.h
@@ -18,7 +18,7 @@
 
 #include <TNL/Problems/PDEProblem.h>
 #include <TNL/Operators/diffusion/LinearDiffusion.h>
-#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Timer.h>
 #include <TNL/Solvers/PDE/ExplicitUpdater.h>
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index bc339e9b3ba56eb9e4d3499d4954be57cda7d864..98cd6d5e4f0f74b797fb88b8b88c83079aee76ee 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -18,7 +18,7 @@
 
 #include <TNL/FileName.h>
 #include <TNL/Matrices/MatrixSetter.h>
-#include <TNL/Matrices/MultidiagonalMatrixSetter.h>
+#include <TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h>
 #include <TNL/Logger.h>
 #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h>
 
diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h
index 69d95aaeee7e5db273940602b7f192c0b75b2591..c81ffdd396a6caa3b535b942572c65b65749f94c 100644
--- a/src/TNL/Problems/PDEProblem.h
+++ b/src/TNL/Problems/PDEProblem.h
@@ -13,7 +13,7 @@
 #include <TNL/Problems/Problem.h>
 #include <TNL/Problems/CommonData.h>
 #include <TNL/Pointers/SharedPointer.h>
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
 
 namespace TNL {
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index d5127fab5dec1e67a97d254f57e81f8d49e3d847..1f2b9f1981c837108be68e0e2864c69537afaf2f 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -15,7 +15,7 @@
 #include "Preconditioner.h"
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 #include <TNL/Pointers/UniquePointer.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
index cce3dc5c4bde030dc33c4762623124e1d3f65367..6a4a4a83b52fd393bbde144dc5591c7583f4c1e7 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -15,7 +15,7 @@
 #include "Preconditioner.h"
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 namespace TNL {
 namespace Solvers {
diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h
index 9d3515157feeac58a73b56353274524a56f6ec1a..70e7737eee5292617355f7899ecc526d439de2c0 100644
--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -16,8 +16,7 @@
 #include <TNL/Solvers/PDE/ExplicitTimeStepper.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
 #include <TNL/Solvers/LinearSolverTypeResolver.h>
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Meshes/DistributedMeshes/DistributedGrid.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 namespace TNL {
 namespace Solvers {
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index ef3119365f50444d89154e487a65a12464849062..255a67fb911b995ea409c341fe9a00104b5d95bf 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -135,6 +135,11 @@ TYPED_TEST( ArrayTest, constructors )
    v = 0;
    EXPECT_EQ( v.getSize(), 10 );
 
+   ArrayType vv( 10, 4 );
+   EXPECT_EQ( vv.getSize(), 10 );
+   for( int i = 0; i < 10; i++ )
+      EXPECT_EQ( vv.getElement( i ), 4 );
+
    // deep copy
    ArrayType w( v );
    EXPECT_NE( w.getData(), v.getData() );
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 9f27aaa86078937540ccc1716069faec0f69b424..227a8655157bbaa0beff7aeac668e592d1f01456 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -1,3 +1,5 @@
+ADD_SUBDIRECTORY( Segments )
+
 ADD_EXECUTABLE( ArrayTest ArrayTest.cpp )
 TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} )
diff --git a/src/UnitTests/Containers/DistributedArrayTest.h b/src/UnitTests/Containers/DistributedArrayTest.h
index f4bd35830369a4251797bb363472a7b51e536878..204bc6fe753c9f75b55bd3523eb1f708faf0b857 100644
--- a/src/UnitTests/Containers/DistributedArrayTest.h
+++ b/src/UnitTests/Containers/DistributedArrayTest.h
@@ -94,7 +94,7 @@ TYPED_TEST( DistributedArrayTest, copyFromGlobal )
    ArrayViewType localArrayView = this->distributedArray.getLocalView();
    auto globalView = globalArray.getConstView();
    const auto localRange = this->distributedArray.getLocalRange();
-   globalView.bind( &globalArray[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() );
+   globalView.bind( &globalArray.getData()[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() );
    EXPECT_EQ( localArrayView, globalView );
 }
 
diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Containers/Segments/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..742fb69ef3060451f54bacafdb7fc1ca0d49a64d
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/CMakeLists.txt
@@ -0,0 +1,52 @@
+IF( BUILD_CUDA )
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   CUDA_ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+ELSE(  BUILD_CUDA )
+#   ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+#   ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cpp )
+#   TARGET_COMPILE_OPTIONS( SegmentsTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( SegmentsTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( SegmentsTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+ENDIF( BUILD_CUDA )
+
+
+#ADD_TEST( SegmentsTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+#ADD_TEST( SegmentsTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SegmentsTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SegmentsTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d4692dbe76785970d75bc7763216ad98f9b7be4
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -0,0 +1,161 @@
+/***************************************************************************
+                          SegmentsTest.hpp -  description
+                             -------------------
+    begin                : Dec 6, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename Segments >
+void test_SetSegmentsSizes_EqualSizes()
+{
+   using DeviceType = typename Segments::DeviceType;
+   using IndexType = typename Segments::IndexType;
+
+   const IndexType segmentsCount = 20;
+   const IndexType segmentSize = 5;
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
+   segmentsSizes = segmentSize;
+
+   Segments segments( segmentsSizes );
+
+   EXPECT_EQ( segments.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments.getSegmentSize( i ), segmentSize );
+
+   Segments segments2( segments );
+   EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments2.getSize(), segments2.getStorageSize() );
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize );
+
+   Segments segments3;
+   segments3.setSegmentsSizes( segmentsSizes );
+
+   EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments3.getSize(), segments3.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
+
+   using SegmentsView = typename Segments::ViewType;
+
+   SegmentsView segmentsView = segments.getView();
+   EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize );
+}
+
+template< typename Segments >
+void test_SetSegmentsSizes_EqualSizes_EllpackOnly()
+{
+   using DeviceType = typename Segments::DeviceType;
+   using IndexType = typename Segments::IndexType;
+
+   const IndexType segmentsCount = 20;
+   const IndexType segmentSize = 5;
+
+   Segments segments( segmentsCount, segmentSize );
+
+   EXPECT_EQ( segments.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments.getSegmentSize( i ), segmentSize );
+
+   Segments segments2( segments );
+   EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments2.getSize(), segments2.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize );
+
+   Segments segments3;
+   segments3.setSegmentsSizes( segmentsCount, segmentSize );
+
+   EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segments3.getSize(), segments3.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize );
+
+   using SegmentsView = typename Segments::ViewType;
+
+   SegmentsView segmentsView = segments.getView();
+   EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount );
+   EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize );
+   EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize );
+}
+
+template< typename Segments >
+void test_AllReduction_MaximumInSegments()
+{
+   using DeviceType = typename Segments::DeviceType;
+   using IndexType = typename Segments::IndexType;
+
+   const IndexType segmentsCount = 20;
+   const IndexType segmentSize = 5;
+
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount );
+   segmentsSizes = segmentSize;
+
+   Segments segments( segmentsSizes );
+
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() );
+
+   auto view = v.getView();
+   auto init = [=] __cuda_callable__ ( const IndexType segmentIdx, const IndexType localIdx, const IndexType globalIdx ) mutable -> bool {
+      view[ globalIdx ] =  segmentIdx * 5 + localIdx + 1;
+      return true;
+   };
+   segments.forAll( init );
+
+   TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
+
+   const auto v_view = v.getConstView();
+   auto result_view = result.getView();
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
+      return v_view[ globalIdx ];
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {
+      a = TNL::max( a, b );
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType i, const IndexType a ) mutable {
+      result_view[ i ] = a;
+   };
+   segments.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
+
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize );
+
+   result_view = 0;
+   segments.getView().allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() );
+   for( IndexType i = 0; i < segmentsCount; i++ )
+      EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize );
+}
+
+#endif
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..02edac3325f027e68b867500f714a275517dcbf2
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp
@@ -0,0 +1 @@
+#include "SegmentsTest_CSR.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu
new file mode 100644
index 0000000000000000000000000000000000000000..02edac3325f027e68b867500f714a275517dcbf2
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu
@@ -0,0 +1 @@
+#include "SegmentsTest_CSR.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
new file mode 100644
index 0000000000000000000000000000000000000000..f2a3a186390e6038f263cd195ef600a7516b0a9e
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h
@@ -0,0 +1,56 @@
+/***************************************************************************
+                          SegmentsTest_CSR.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/CSR.h>
+
+#include "SegmentsTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Segments >
+class CSRSegmentsTest : public ::testing::Test
+{
+protected:
+   using CSRSegmentsType = Segments;
+};
+
+// types for which MatrixTest is instantiated
+using CSRSegmentsTypes = ::testing::Types
+<
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Host, long   >
+#ifdef HAVE_CUDA
+   ,TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long   >
+#endif
+>;
+
+TYPED_TEST_SUITE( CSRSegmentsTest, CSRSegmentsTypes );
+
+TYPED_TEST( CSRSegmentsTest, setSegmentsSizes_EqualSizes )
+{
+    using CSRSegmentsType = typename TestFixture::CSRSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes< CSRSegmentsType >();
+}
+
+TYPED_TEST( CSRSegmentsTest, allReduction_MaximumInSegments )
+{
+    using CSRSegmentsType = typename TestFixture::CSRSegmentsType;
+
+    test_AllReduction_MaximumInSegments< CSRSegmentsType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..120a25103eb1e48d32ed9d9233f6657ba02699a6
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp
@@ -0,0 +1 @@
+#include "SegmentsTest_Ellpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..120a25103eb1e48d32ed9d9233f6657ba02699a6
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu
@@ -0,0 +1 @@
+#include "SegmentsTest_Ellpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..7def8a7329d0845236f213250dbeceecb5775263
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h
@@ -0,0 +1,63 @@
+/***************************************************************************
+                          SegmentsTest_Ellpack.h -  description
+                             -------------------
+    begin                : Dec 6, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/Ellpack.h>
+
+#include "SegmentsTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Segments >
+class EllpackSegmentsTest : public ::testing::Test
+{
+protected:
+   using EllpackSegmentsType = Segments;
+};
+
+// types for which MatrixTest is instantiated
+using EllpackSegmentsTypes = ::testing::Types
+<
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long   >
+#ifdef HAVE_CUDA
+   ,TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long   >
+#endif
+>;
+
+TYPED_TEST_SUITE( EllpackSegmentsTest, EllpackSegmentsTypes );
+
+TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes )
+{
+    using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes< EllpackSegmentsType >();
+}
+
+TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly )
+{
+    using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >();
+}
+
+TYPED_TEST( EllpackSegmentsTest, allReduction_MaximumInSegments )
+{
+    using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType;
+
+    test_AllReduction_MaximumInSegments< EllpackSegmentsType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cd9865f28005d55a1912dcc8f6f7c83844579ec6
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp
@@ -0,0 +1 @@
+#include "SegmentsTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..cd9865f28005d55a1912dcc8f6f7c83844579ec6
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu
@@ -0,0 +1 @@
+#include "SegmentsTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..51131c7dfb0a95df305ca07cf26ef6f7f5350132
--- /dev/null
+++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h
@@ -0,0 +1,56 @@
+/***************************************************************************
+                          SegmentsTest_SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 9, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+#include "SegmentsTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Segments >
+class SlicedEllpackSegmentsTest : public ::testing::Test
+{
+protected:
+   using SlicedEllpackSegmentsType = Segments;
+};
+
+// types for which MatrixTest is instantiated
+using SlicedEllpackSegmentsTypes = ::testing::Types
+<
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long   >
+#ifdef HAVE_CUDA
+   ,TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int    >,
+    TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long   >
+#endif
+>;
+
+TYPED_TEST_SUITE( SlicedEllpackSegmentsTest, SlicedEllpackSegmentsTypes );
+
+TYPED_TEST( SlicedEllpackSegmentsTest, setSegmentsSizes_EqualSizes )
+{
+    using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType;
+
+    test_SetSegmentsSizes_EqualSizes< SlicedEllpackSegmentsType >();
+}
+
+TYPED_TEST( SlicedEllpackSegmentsTest, allReduction_MaximumInSegments )
+{
+    using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType;
+
+    test_AllReduction_MaximumInSegments< SlicedEllpackSegmentsType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..51d7c4ea9425460ff35991bd44a3b9bf6f7e9a95
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixCopyTest.cpp  -  description
+                             -------------------
+    begin                : Feb 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f29db9e968486e16fcf3a9c6f2c8c1e067119344
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixCopyTest.cu  -  description
+                             -------------------
+    begin                : Feb 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..b901acbbd93dd7a7416645e70441d1382bd381a3
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
@@ -0,0 +1,820 @@
+/***************************************************************************
+                          BinaryBinarySparseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Feb 5, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >;
+
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+/*
+ * Sets up the following 10x6 sparse matrix:
+ *
+ *    /  1  1             \
+ *    |           1  1  1 |
+ *    |  1  1  1          |
+ *    |     1  1  1  1  1 |
+ *    |  1  1  1  1  1    |
+ *    |  1  1             |
+ *    |  1                |
+ *    |  1                |
+ *    |  1  1  1  1  1    |
+ *    \                 1 /
+ */
+template< typename Matrix >
+void setupUnevenRowSizeMatrix( Matrix& m )
+{
+    const int rows = 10;
+    const int cols = 6;
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 5 );
+    rowLengths.setElement( 0, 2 );
+    rowLengths.setElement( 1,  3 );
+    rowLengths.setElement( 2,  3 );
+    rowLengths.setElement( 5,  2 );
+    rowLengths.setElement( 6,  1 );
+    rowLengths.setElement( 7,  1 );
+    rowLengths.setElement( 9,  1 );
+    m.setCompressedRowLengths( rowLengths );
+
+    for( int i = 0; i < cols - 4; i++ )  // 0th row
+        m.setElement( 0, i, 1 );
+
+    for( int i = 3; i < cols; i++ )      // 1st row
+        m.setElement( 1, i, 1 );
+
+    for( int i = 0; i < cols - 3; i++ )  // 2nd row
+        m.setElement( 2, i, 1 );
+
+    for( int i = 1; i < cols; i++ )      // 3rd row
+        m.setElement( 3, i, 1 );
+
+    for( int i = 0; i < cols - 1; i++ )  // 4th row
+        m.setElement( 4, i, 1 );
+
+    for( int i = 0; i < cols - 4; i++ )  // 5th row
+        m.setElement( 5, i, 1 );
+
+    m.setElement( 6, 0, 1 );   // 6th row
+
+    m.setElement( 7, 0, 1 );   // 7th row
+
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
+        m.setElement( 8, i, 1 );
+
+    m.setElement( 9, 5, 1 );   // 9th row
+}
+
+template< typename Matrix >
+void checkUnevenRowSizeMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 10 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0);
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    /              1  1 \
+ *    |           1  1  1 |
+ *    |        1  1  1    |
+ *    |     1  1  1       |
+ *    |  1  1  1          |
+ *    |  1  1             |
+ *    \  1                /
+ */
+template< typename Matrix >
+void setupAntiTriDiagMatrix( Matrix& m )
+{
+    const int rows = 7;
+    const int cols = 6;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+    rowLengths.setElement( 0, 4);
+    rowLengths.setElement( 1,  4 );
+    m.setCompressedRowLengths( rowLengths );
+
+    for( int i = 0; i < rows; i++ )
+        for( int j = cols - 1; j > 2; j-- )
+            if( j - i + 1 < cols && j - i + 1 >= 0 )
+                m.setElement( i, j - i + 1, 1 );
+}
+
+template< typename Matrix >
+void checkAntiTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  1);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  1 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  1 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  1 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  1             \
+ *    | 1  1  1          |
+ *    |    1  1  1       |
+ *    |       1  1  1    |
+ *    |          1  1  1 |
+ *    |             1  1 |
+ *    \                1 /
+ */
+template< typename Matrix >
+void setupTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.reset();
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, 1 );
+}
+
+template< typename Matrix >
+void checkTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 1 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+{
+   {
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag2 );
+   }
+   {
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   {
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testConversion()
+{
+   {
+        SCOPED_TRACE("Tri Diagonal Matrix");
+
+        Matrix1 triDiag1;
+        setupTriDiagMatrix( triDiag1 );
+        checkTriDiagMatrix( triDiag1 );
+
+        Matrix2 triDiag2;
+        triDiag2 = triDiag1;
+        checkTriDiagMatrix( triDiag2 );
+   }
+
+   {
+        SCOPED_TRACE("Anti Tri Diagonal Matrix");
+
+        Matrix1 antiTriDiag1;
+        setupAntiTriDiagMatrix( antiTriDiag1 );
+        checkAntiTriDiagMatrix( antiTriDiag1 );
+
+        Matrix2 antiTriDiag2;
+        antiTriDiag2 = antiTriDiag1;
+        checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+
+   {
+        SCOPED_TRACE("Uneven Row Size Matrix");
+        Matrix1 unevenRowSize1;
+        setupUnevenRowSizeMatrix( unevenRowSize1 );
+        checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+        Matrix2 unevenRowSize2;
+        unevenRowSize2 = unevenRowSize1;
+        checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
+      }
+#endif
+}
+
+template< typename Matrix >
+void multidiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
+   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns, diagonals );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonals.containsValue( j - i ) )
+            hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
+   /*std::cerr << "hostMatrix " << hostMatrix << std::endl;
+   std::cerr << "matrix " << matrix << std::endl;
+   std::cerr << "rowCapacities " << rowCapacities << std::endl;*/
+
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonals );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+template< typename Matrix >
+void denseMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = TNL::min( i + j, 1 );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) );
+      }
+#endif
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_HostToHost )
+{
+   testCopyAssignment< CSR_host, CSR_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, CSR_HostToCuda )
+{
+   testCopyAssignment< CSR_host, CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_CudaToHost )
+{
+   testCopyAssignment< CSR_cuda, CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_CudaToCuda )
+{
+   testCopyAssignment< CSR_cuda, CSR_cuda >();
+}
+#endif
+
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_HostToHost )
+{
+   testCopyAssignment< E_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, Ellpack_HostToCuda )
+{
+   testCopyAssignment< E_host, E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToHost )
+{
+   testCopyAssignment< E_cuda, E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToCuda )
+{
+   testCopyAssignment< E_cuda, E_cuda >();
+}
+#endif
+
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToHost )
+{
+   testCopyAssignment< SE_host, SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToCuda )
+{
+   testCopyAssignment< SE_host, SE_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToHost )
+{
+   testCopyAssignment< SE_cuda, SE_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
+{
+   testCopyAssignment< SE_cuda, SE_cuda >();
+}
+#endif
+
+////
+// Test of conversion between formats
+TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_host )
+{
+   testConversion< CSR_host, E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_host )
+{
+   testConversion< E_host, CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_host )
+{
+   testConversion< CSR_host, SE_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_host )
+{
+   testConversion< SE_host, CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host )
+{
+   testConversion< E_host, SE_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host )
+{
+   testConversion< SE_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_cuda )
+{
+   testConversion< CSR_cuda, E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_cuda )
+{
+   testConversion< E_cuda, CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda )
+{
+   testConversion< CSR_cuda, SE_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda )
+{
+   testConversion< SE_cuda, CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda )
+{
+   testConversion< E_cuda, SE_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
+{
+   testConversion< SE_cuda, E_cuda >();
+}
+#endif
+
+////
+// Tridiagonal matrix assignment test
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host )
+{
+   tridiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host )
+{
+   tridiagonalMatrixAssignment< E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   tridiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda )
+{
+   tridiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   tridiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   tridiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+////
+// Multidiagonal matrix assignment test
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host )
+{
+   multidiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host )
+{
+   multidiagonalMatrixAssignment< E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   multidiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda )
+{
+   multidiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   multidiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   multidiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+////
+// Dense matrix assignment test
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
+{
+   denseMatrixAssignment< CSR_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host )
+{
+   denseMatrixAssignment< E_host >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host )
+{
+   denseMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda )
+{
+   denseMatrixAssignment< CSR_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda )
+{
+   denseMatrixAssignment< E_cuda >();
+}
+
+TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   denseMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+#endif //HAVE_GTEST
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..276c432ff349321beb642c177cb3ef6cd282059a
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -0,0 +1,1203 @@
+/***************************************************************************
+                          SparseMatrixTest_impl.h -  description
+                             -------------------
+    begin                : Nov 22, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <iostream>
+#include <sstream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
+
+   Matrix m2( rows, cols );
+   EXPECT_EQ( m2.getRows(), 9 );
+   EXPECT_EQ( m2.getColumns(), 8 );
+}
+
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+
+   IndexType rowLength = 1;
+   for( IndexType i = 2; i < rows; i++ )
+      rowLengths.setElement( i, rowLength++ );
+
+   m.setCompressedRowLengths( rowLengths );
+
+   // Insert values into the rows.
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m.setElement( 0, i, 1 );
+
+   for( IndexType i = 0; i < 3; i++ )      // 1st row
+      m.setElement( 1, i, 1 );
+
+   for( IndexType i = 0; i < 1; i++ )      // 2nd row
+      m.setElement( 2, i, 1 );
+
+   for( IndexType i = 0; i < 2; i++ )      // 3rd row
+      m.setElement( 3, i, 1 );
+
+   for( IndexType i = 0; i < 3; i++ )      // 4th row
+      m.setElement( 4, i, 1 );
+
+   for( IndexType i = 0; i < 4; i++ )      // 5th row
+      m.setElement( 5, i, 1 );
+
+   for( IndexType i = 0; i < 5; i++ )      // 6th row
+      m.setElement( 6, i, 1 );
+
+   for( IndexType i = 0; i < 6; i++ )      // 7th row
+      m.setElement( 7, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )      // 8th row
+      m.setElement( 8, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )      // 9th row
+      m.setElement( 9, i, 1 );
+
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  1  0  1  0  1  0  0  0  \
+    *    |  1  1  1  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  0  0  |
+    *    |  1  1  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  1  1  |
+    *    \  1  1  1  1  1  1  1  1  1  1 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 1 );
+   rowLengths.setElement( 1, 1 );
+   rowLengths.setElement( 2, 1 );
+   rowLengths.setElement( 3, 1 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+      rowLengths.setElement( i, 1 );
+
+   rowLengths.setElement( 8, 1 );
+   rowLengths.setElement( 9, 1 );
+   m.setCompressedRowLengths( rowLengths );
+
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, 1 );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, 1 );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, 1 );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, 1 );
+
+   for( IndexType j = 8; j < rows; j++)
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, 1 );
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+   m.reset();
+
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_GetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  1  0  1  0  1  0  0  0  \
+    *    |  1  1  1  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  0  0  |
+    *    |  1  1  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  1  1  |
+    *    \  1  1  1  1  1  1  1  1  1  1 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+       rowLengths.setElement( i, 1 );
+
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      switch( rowIdx )
+      {
+         case 0:
+           for( IndexType i = 0; i < 4; i++ )
+              row.setElement( i, 2 * i, 1 );
+           break;
+        case 1:
+           for( IndexType i = 0; i < 3; i++ )
+              row.setElement( i, i, 1 );
+           break;
+        case 2:
+           for( IndexType i = 0; i < 8; i++ )
+              row.setElement( i, i, 1 );
+           break;
+        case 3:
+           for( IndexType i = 0; i < 2; i++ )
+              row.setElement( i, i, 1 );
+           break;
+        case 4:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 5:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 6:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 7:
+           row.setElement( 0, 0, 1 );
+           break;
+        case 8:
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, 1 );
+            break;
+        case 9:
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, 1 );
+            break;
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 1 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 1 );
+}
+
+
+template< typename Matrix >
+void test_SetElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  1  0  1  0  1  0  0  0  \
+    *    |  1  1  1  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  0  0  |
+    *    |  1  1  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  0  0  0  0  0  0  0  0  0  |
+    *    |  1  1  1  1  1  1  1  1  1  1  |
+    *    \  1  1  1  1  1  1  1  1  1  1 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+       rowLengths.setElement( i, 1 );
+
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   for( IndexType i = 0; i < 4; i++ )
+       m.setElement( 0, 2 * i, 1 );
+
+   for( IndexType i = 0; i < 3; i++ )
+       m.setElement( 1, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )
+       m.setElement( 2, i, 1 );
+
+   for( IndexType i = 0; i < 2; i++ )
+       m.setElement( 3, i, 1 );
+
+   for( IndexType i = 4; i < 8; i++ )
+       m.setElement( i, 0, 1 );
+
+   for( IndexType j = 8; j < rows; j++)
+       for( IndexType i = 0; i < cols; i++ )
+           m.setElement( j, i, 1 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 7, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 5 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 6 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 7 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 8 ), 0 );
+   EXPECT_EQ( m.getElement( 7, 9 ), 0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 1 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 1 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 1 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  1  0  1 |
+    *    |  0  1  0  0 |
+    *    \  0  0  1  0 /
+    */
+
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
+
+   Matrix m_1( m_rows_1, m_cols_1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1;
+   rowLengths_1.setSize( m_rows_1 );
+   rowLengths_1.setElement( 0, 1 );
+   rowLengths_1.setElement( 1, 2 );
+   rowLengths_1.setElement( 2, 1 );
+   rowLengths_1.setElement( 3, 1 );
+   m_1.setCompressedRowLengths( rowLengths_1 );
+
+   m_1.setElement( 0, 0, 1 );      // 0th row
+
+   m_1.setElement( 1, 1, 1 );      // 1st row
+   m_1.setElement( 1, 3, 1 );
+
+   m_1.setElement( 2, 1, 1 );      // 2nd row
+
+   m_1.setElement( 3, 2, 1 );      // 3rd row
+
+   VectorType inVector_1( m_cols_1 );
+   inVector_1 = 2.0;
+
+   VectorType outVector_1( m_rows_1 );
+   outVector_1 = 0.0;
+
+   m_1.vectorProduct( inVector_1, outVector_1 );
+
+
+   EXPECT_EQ( outVector_1.getElement( 0 ), 2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 4 );
+   EXPECT_EQ( outVector_1.getElement( 2 ), 2 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 2 );
+
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  0  0  1 |
+    *    |  1  1  1  0 |
+    *    \  0  1  0  0 /
+    */
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
+
+   Matrix m_2( m_rows_2, m_cols_2 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_2;
+   rowLengths_2.setSize( m_rows_2 );
+   rowLengths_2.setValue( 3 );
+   rowLengths_2.setElement( 1, 1 );
+   rowLengths_2.setElement( 3, 1 );
+   m_2.setCompressedRowLengths( rowLengths_2 );
+
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_2.setElement( 0, i, 1 );
+
+   m_2.setElement( 1, 3, 1 );           // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )   // 2nd row
+      m_2.setElement( 2, i, 1 );
+
+   for( IndexType i = 1; i < 2; i++ )   // 3rd row
+      m_2.setElement( 3, i, 1 );
+
+   VectorType inVector_2( m_cols_2 );
+   inVector_2 = 2.0;
+
+   VectorType outVector_2( m_rows_2 );
+   outVector_2 = 0.0;
+
+   m_2.vectorProduct( inVector_2, outVector_2 );
+
+   EXPECT_EQ( outVector_2.getElement( 0 ), 6 );
+   EXPECT_EQ( outVector_2.getElement( 1 ), 2 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 6 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 2 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  1  1  1 |
+    *    |  1  1  1  0 |
+    *    \  0  1  1  1 /
+    */
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
+
+   Matrix m_3( m_rows_3, m_cols_3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_3;
+   rowLengths_3.setSize( m_rows_3 );
+   rowLengths_3.setValue( 3 );
+   m_3.setCompressedRowLengths( rowLengths_3 );
+
+   for( IndexType i = 0; i < 3; i++ )          // 0th row
+      m_3.setElement( 0, i, 1 );
+
+   for( IndexType i = 1; i < 4; i++ )
+      m_3.setElement( 1, i, 1 );      // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )          // 2nd row
+      m_3.setElement( 2, i, 1 );
+
+   for( IndexType i = 1; i < 4; i++ )          // 3rd row
+      m_3.setElement( 3, i, 1 );
+
+   VectorType inVector_3( m_cols_3 );
+   inVector_3 = 2.0;
+
+   VectorType outVector_3( m_rows_3 );
+   outVector_3 = 0.0;
+
+   m_3.vectorProduct( inVector_3, outVector_3 );
+
+
+   EXPECT_EQ( outVector_3.getElement( 0 ), 6 );
+   EXPECT_EQ( outVector_3.getElement( 1 ), 6 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 6 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 6 );
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  1  1  0  0  1  0  0 \
+    *    |  0  1  1  1  1  0  0  0 |
+    *    |  1  1  1  1  1  0  0  0 |
+    *    |  0  1  1  1  1  0  0  0 |
+    *    |  0  0  1  1  1  1  0  0 |
+    *    |  0  0  0  1  1  1  1  0 |
+    *    |  1  1  1  1  1  0  0  0 |
+    *    \  1  1  1  1  1  0  0  0 /
+    */
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
+
+   Matrix m_4( m_rows_4, m_cols_4 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_4;
+   rowLengths_4.setSize( m_rows_4 );
+   rowLengths_4.setValue( 4 );
+   rowLengths_4.setElement( 2, 5 );
+   rowLengths_4.setElement( 6, 5 );
+   rowLengths_4.setElement( 7, 5 );
+   m_4.setCompressedRowLengths( rowLengths_4 );
+
+   for( IndexType i = 0; i < 3; i++ )       // 0th row
+      m_4.setElement( 0, i, 1 );
+
+   m_4.setElement( 0, 5, 1 );
+
+   for( IndexType i = 1; i < 5; i++ )       // 1st row
+      m_4.setElement( 1, i, 1 );
+
+   for( IndexType i = 0; i < 5; i++ )       // 2nd row
+      m_4.setElement( 2, i, 1 );
+
+   for( IndexType i = 1; i < 5; i++ )       // 3rd row
+      m_4.setElement( 3, i, 1 );
+
+   for( IndexType i = 2; i < 6; i++ )       // 4th row
+      m_4.setElement( 4, i, 1 );
+
+   for( IndexType i = 3; i < 7; i++ )       // 5th row
+      m_4.setElement( 5, i, 1 );
+
+   for( IndexType i = 0; i < 5; i++ )       // 6th row
+      m_4.setElement( 6, i, 1 );
+
+   for( IndexType i = 0; i < 5; i++ )       // 7th row
+      m_4.setElement( 7, i, 1 );
+
+   VectorType inVector_4( m_cols_4 );
+   inVector_4 = 2.0;
+
+   VectorType outVector_4( m_rows_4 );
+   outVector_4 = 0.0;
+
+   m_4.vectorProduct( inVector_4, outVector_4 );
+
+
+   EXPECT_EQ( outVector_4.getElement( 0 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 2 ), 10 );
+   EXPECT_EQ( outVector_4.getElement( 3 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 4 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 5 ),  8 );
+   EXPECT_EQ( outVector_4.getElement( 6 ), 10 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 10 );
+
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  1  1  0  1  1  0  1 \   6
+    *    |  0  1  0  1  0  0  0  1 |   3
+    *    |  0  1  1  0  1  0  0  1 |   4
+    *    |  0  1  1  1  1  0  0  1 |   5
+    *    |  0  1  0  0  0  0  0  1 |   2
+    *    |  0  1  1  1  1  1  1  1 |   7
+    *    |  1  1  1  1  1  1  1  1 |   8
+    *    \  1  1  1  1  1  1  1  1 /   8
+    */
+
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+
+   Matrix m_5( m_rows_5, m_cols_5 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_5;
+   rowLengths_5.setSize( m_rows_5 );
+   rowLengths_5.setElement(0, 6);
+   rowLengths_5.setElement(1, 3);
+   rowLengths_5.setElement(2, 4);
+   rowLengths_5.setElement(3, 5);
+   rowLengths_5.setElement(4, 2);
+   rowLengths_5.setElement(5, 7);
+   rowLengths_5.setElement(6, 8);
+   rowLengths_5.setElement(7, 8);
+   m_5.setCompressedRowLengths( rowLengths_5 );
+
+   RealType value_5 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_5.setElement( 0, i, 1 );
+
+   m_5.setElement( 0, 4, 1 );           // 0th row
+   m_5.setElement( 0, 5, 1 );
+
+   m_5.setElement( 1, 1, 1 );           // 1st row
+   m_5.setElement( 1, 3, 1 );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m_5.setElement( 2, i, 1 );
+
+   m_5.setElement( 2, 4, 1 );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m_5.setElement( 3, i, 1 );
+
+   m_5.setElement( 4, 1, 1 );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m_5.setElement( 5, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m_5.setElement( 6, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+      m_5.setElement( 7, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m_5.setElement( i, 7, 1);
+
+   VectorType inVector_5( m_cols_5 );
+   inVector_5 = 2.0;
+
+   VectorType outVector_5( m_rows_5 );
+   outVector_5 = 0.0;
+
+   m_5.vectorProduct( inVector_5, outVector_5 );
+
+   EXPECT_EQ( outVector_5.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_5.getElement( 1 ),  6 );
+   EXPECT_EQ( outVector_5.getElement( 2 ),  8 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 10 );
+   EXPECT_EQ( outVector_5.getElement( 4 ),  4 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 14 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 16 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 16 );
+}
+
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  1  1  0  1  1  0  1 \   6
+    *    |  0  1  0  1  0  0  0  1 |   3
+    *    |  0  1  1  0  1  0  0  1 |   4
+    *    |  0  1  1  1  1  0  0  1 |   5
+    *    |  0  1  0  0  0  0  0  1 |   2
+    *    |  0  1  1  1  1  1  1  1 |   7
+    *    |  1  1  1  1  1  1  1  1 |   8
+    *    \  1  1  1  1  1  1  1  1 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities( rows );
+   //rowLengths.setSize( rows );
+   rowsCapacities.setElement(0, 6);
+   rowsCapacities.setElement(1, 3);
+   rowsCapacities.setElement(2, 4);
+   rowsCapacities.setElement(3, 5);
+   rowsCapacities.setElement(4, 2);
+   rowsCapacities.setElement(5, 7);
+   rowsCapacities.setElement(6, 8);
+   rowsCapacities.setElement(7, 8);
+   m.setCompressedRowLengths( rowsCapacities );
+
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, 1 );
+
+   m.setElement( 0, 4, 1 );             // 0th row
+   m.setElement( 0, 5, 1 );
+
+   m.setElement( 1, 1, 1 );             // 1st row
+   m.setElement( 1, 3, 1 );
+
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
+      m.setElement( 2, i, 1 );
+
+   m.setElement( 2, 4, 1 );             // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
+      m.setElement( 3, i, 1 );
+
+   m.setElement( 4, 1, 1 );             // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
+      m.setElement( 5, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
+      m.setElement( 6, i, 1 );
+
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
+       m.setElement( 7, i, 1 );
+
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 8 ) ; // 29+30+31+32+33+34+35+36
+}
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  0  0 \
+    *    |  1  1  1  0 |
+    *    |  0  1  1  1 |
+    *    \  0  0  1  1 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( m_rows );
+   rowLengths.setValue( 3 );
+   m.setCompressedRowLengths( rowLengths );
+
+   m.setElement( 0, 0, 4.0 );        // 0th row
+   m.setElement( 0, 1, 1.0);
+
+   m.setElement( 1, 0, 1.0 );        // 1st row
+   m.setElement( 1, 1, 4.0 );
+   m.setElement( 1, 2, 1.0 );
+
+   m.setElement( 2, 1, 1.0 );        // 2nd row
+   m.setElement( 2, 2, 4.0 );
+   m.setElement( 2, 3, 1.0 );
+
+   m.setElement( 3, 2, 1.0 );        // 3rd row
+   m.setElement( 3, 3, 4.0 );
+
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+
+   IndexType row = 0;
+   RealType omega = 1;
+
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
+}
+
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  1  0  1 |
+    *    |  1  1  1  0 |
+    *    \  0  1  1  1 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix savedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   savedMatrix.setCompressedRowLengths( rowLengths );
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+       savedMatrix.setElement( 0, i, 1 );
+
+   savedMatrix.setElement( 1, 1, 1 );
+   savedMatrix.setElement( 1, 3, 1 );            // 1st row
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+       savedMatrix.setElement( 2, i, 1 );
+
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+       savedMatrix.setElement( 3, i, 1 );
+
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
+
+   Matrix loadedMatrix;
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  1 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ),  1 );
+
+   EXPECT_EQ( std::remove( filename ), 0 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  1  1  0 \
+    *    |  0  0  0  1 |
+    *    |  1  1  1  0 |
+    *    |  0  1  1  1 |
+    *    \  0  0  1  1 /
+    */
+
+   const IndexType m_rows = 5;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+       m.setElement( 0, i, 1 );
+
+   m.setElement( 1, 3, 1 );      // 1st row
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+       m.setElement( 2, i, 1 );
+
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+       m.setElement( 3, i, 1 );
+
+   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+       m.setElement( 4, i, 1 );
+
+   std::stringstream printed;
+   std::stringstream couted;
+
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+   m.print( std::cout ); //all the std::cout goes to ss
+
+   std::cout.rdbuf(old_buf); //reset
+
+   couted << "Row: 0 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
+              "Row: 1 ->  Col:3->1\t\n"
+              "Row: 2 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
+              "Row: 3 ->  Col:1->1	 Col:2->1	 Col:3->1\t\n"
+              "Row: 4 ->  Col:2->1	 Col:3->1\t\n";
+
+   EXPECT_EQ( printed.str(), couted.str() );
+}
+
+#endif
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1f4891053e1f1ad51873d3e6f8797d8477914b2
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_CSR.cpp -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_CSR.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu
new file mode 100644
index 0000000000000000000000000000000000000000..496bdde1b301ab06cd73221b17acb27ba7a80b34
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_CSR.cu -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_CSR.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
new file mode 100644
index 0000000000000000000000000000000000000000..a853281bef7b1bb9a3cb4985b6a3a53ba519ee45
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
@@ -0,0 +1,127 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_CSR.h -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "BinarySparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class BinaryMatrixTest_CSR : public ::testing::Test
+{
+protected:
+   using CSRMatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using CSRMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >
+#endif
+>;
+
+TYPED_TEST_SUITE( BinaryMatrixTest_CSR, CSRMatrixTypes);
+
+TYPED_TEST( BinaryMatrixTest_CSR, setDimensionsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetDimensions< CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, setCompressedRowLengthsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetCompressedRowLengths< CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, setLikeTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetLike< CSRMatrixType, CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, resetTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Reset< CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, getRowTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_GetRow< CSRMatrixType >();
+}
+
+
+TYPED_TEST( BinaryMatrixTest_CSR, setElementTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetElement< CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, vectorProductTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_VectorProduct< CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, rowsReduction )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_RowsReduction< CSRMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest_CSR" );
+}
+
+TYPED_TEST( BinaryMatrixTest_CSR, printTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Print< CSRMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1d5d71cf681fda25c0b0dde127dcc20c795a039
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_Ellpack.cpp -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_Ellpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8d075f1cf4539b741be6e1693abe8ddd28766f74
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_Ellpack.cu -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_Ellpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..3c0a65cfda5dbc98edc099fcb39a6334afebd0f3
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
@@ -0,0 +1,138 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_Ellpack.h -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "BinarySparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class BinaryMatrixTest_Ellpack : public ::testing::Test
+{
+protected:
+   using EllpackMatrixType = Matrix;
+};
+
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAlocator >
+using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >;
+
+// types for which MatrixTest is instantiated
+using EllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >
+#endif
+>;
+
+TYPED_TEST_SUITE( BinaryMatrixTest_Ellpack, EllpackMatrixTypes);
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setDimensionsTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetDimensions< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setCompressedRowLengthsTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetCompressedRowLengths< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setLikeTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetLike< EllpackMatrixType, EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, resetTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Reset< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, getRowTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_GetRow< EllpackMatrixType >();
+}
+
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, setElementTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetElement< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, vectorProductTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_VectorProduct< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, rowsReduction )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_RowsReduction< EllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SaveAndLoad< EllpackMatrixType >( "test_BinarySparseMatrixTest_Ellpack" );
+}
+
+TYPED_TEST( BinaryMatrixTest_Ellpack, printTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Print< EllpackMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7046d815629444d3275eaae4b44d429f0ea8c3f7
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_SlicedEllpack.cpp -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_SlicedEllpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bb6829310fc45f5679bf658a9a792b72dbb0b6b9
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_SlicedEllpack.cu -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "BinarySparseMatrixTest_SlicedEllpack.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..98c5f65ae63fcec30b91d1cbc2efd67e5586e32a
--- /dev/null
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
@@ -0,0 +1,138 @@
+/***************************************************************************
+                          BinarySparseMatrixTest_SlicedEllpack.h -  description
+                             -------------------
+    begin                : Jan 30, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+
+#include "BinarySparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class BinaryMatrixTest_SlicedEllpack : public ::testing::Test
+{
+protected:
+   using SlicedEllpackMatrixType = Matrix;
+};
+
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAllocator >
+using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >;
+
+// types for which MatrixTest is instantiated
+using SlicedEllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >
+#endif
+>;
+
+TYPED_TEST_SUITE( BinaryMatrixTest_SlicedEllpack, SlicedEllpackMatrixTypes);
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setDimensionsTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetDimensions< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setCompressedRowLengthsTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setLikeTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, resetTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Reset< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, getRowTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_GetRow< SlicedEllpackMatrixType >();
+}
+
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setElementTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetElement< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, vectorProductTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_VectorProduct< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, rowsReduction )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_RowsReduction< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_BinarySparseMatrixTest" );
+}
+
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, printTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Print< SlicedEllpackMatrixType >();
+}
+
+#endif
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 2a08be2198e1dcbff5de4ccacccae38e2f52f17b..1c536a98210b59789d2a7b34a9b9935150a7e0ac 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -1,18 +1,17 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+ADD_SUBDIRECTORY( Legacy )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
@@ -23,28 +22,40 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
-ELSE(  BUILD_CUDA )
-   ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( DenseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp )
-   TARGET_COMPILE_OPTIONS( SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
@@ -58,24 +69,45 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp )
-   TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+   ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( BinarySparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( SymmetricSparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
 ENDIF( BUILD_CUDA )
 
+ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
-ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-# TODO: Uncomment the following when AdEllpack works
-#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
-# TODO: DenseMatrixTest is not finished
-#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-
+ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( SymmetricSparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SymmetricSparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
 
 if( ${BUILD_MPI} )
    if( BUILD_CUDA )
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5cdd413af60773cddf070493509b0be861c97018
--- /dev/null
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          DenseMatrixCopyTest.cpp  -  description
+                             -------------------
+    begin                : Jan 19, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "DenseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cu b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..5fbd77efa119a0bde37f52d0aeecbccb1581262b
--- /dev/null
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          DenseMatrixCopyTest.cu  -  description
+                             -------------------
+    begin                : Jan 19, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "DenseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..3ef31f1075beb311374ad0a45e4a4aff7d2641eb
--- /dev/null
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
@@ -0,0 +1,657 @@
+/***************************************************************************
+                          DenseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Jan 19, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using Dense_host               = TNL::Matrices::Dense< int, TNL::Devices::Host, int, false >;
+using Dense_host_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Host, int, true >;
+using Dense_cuda               = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, false >;
+using Dense_cuda_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, true >;
+
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+/*
+ * Sets up the following 10x6 sparse matrix:
+ *
+ *    /  1  2             \
+ *    |           3  4  5 |
+ *    |  6  7  8          |
+ *    |     9 10 11 12 13 |
+ *    | 14 15 16 17 18    |
+ *    | 19 20             |
+ *    | 21                |
+ *    | 22                |
+ *    | 23 24 25 26 27    |
+ *    \                28 /
+ */
+template< typename Matrix >
+void setupUnevenRowSizeMatrix( Matrix& m )
+{
+   const int rows = 10;
+   const int cols = 6;
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 5 );
+   rowLengths.setElement( 0, 2 );
+   rowLengths.setElement( 1,  3 );
+   rowLengths.setElement( 2,  3 );
+   rowLengths.setElement( 5,  2 );
+   rowLengths.setElement( 6,  1 );
+   rowLengths.setElement( 7,  1 );
+   rowLengths.setElement( 9,  1 );
+   m.setCompressedRowLengths( rowLengths );
+
+    int value = 1;
+    for( int i = 0; i < cols - 4; i++ )  // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( int i = 3; i < cols; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( int i = 0; i < cols - 3; i++ )  // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( int i = 1; i < cols; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( int i = 0; i < cols - 1; i++ )  // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( int i = 0; i < cols - 4; i++ )  // 5th row
+        m.setElement( 5, i, value++ );
+
+    m.setElement( 6, 0, value++ );   // 6th row
+
+    m.setElement( 7, 0, value++ );   // 7th row
+
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
+        m.setElement( 8, i, value++ );
+
+    m.setElement( 9, 5, value++ );   // 9th row
+}
+
+template< typename Matrix >
+void checkUnevenRowSizeMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 10 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  5 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 20 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 28 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    /              2  1 \
+ *    |           5  4  3 |
+ *    |        8  7  6    |
+ *    |    11 10  9       |
+ *    | 14 13 12          |
+ *    | 16 15             |
+ *    \ 17                /
+ */
+template< typename Matrix >
+void setupAntiTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0, 4);
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = cols - 1; j > 2; j-- )
+         if( j - i + 1 < cols && j - i + 1 >= 0 )
+            m.setElement( i, j - i + 1, value++ );
+}
+
+template< typename Matrix >
+void checkAntiTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  1);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  3 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 17 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  2             \
+ *    | 3  4  5          |
+ *    |    6  7  8       |
+ *    |       9 10 11    |
+ *    |         12 13 14 |
+ *    |            15 16 |
+ *    \               17 /
+ */
+template< typename Matrix >
+void setupTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
+
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, value++ );
+}
+
+template< typename Matrix >
+void checkTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 14 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 16 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 17 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+{
+   {
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag2 );
+   }
+   {
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   {
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+template< typename Matrix >
+void multidiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
+   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns, diagonals );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonals.containsValue( j - i ) )
+            hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonals );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+template< typename Matrix >
+void denseMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+TEST( DenseMatrixCopyTest, Dense_HostToDense_Host )
+{
+   testCopyAssignment< Dense_host,               Dense_host >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_host >();
+   testCopyAssignment< Dense_host,               Dense_host_RowMajorOrder >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, Dense_HostToDense_Cuda )
+{
+   testCopyAssignment< Dense_host,               Dense_cuda >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda >();
+   testCopyAssignment< Dense_host,               Dense_cuda_RowMajorOrder >();
+   testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, Dense_CudaToDense_Host )
+{
+   testCopyAssignment< Dense_cuda,               Dense_host >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host >();
+   testCopyAssignment< Dense_cuda,               Dense_host_RowMajorOrder >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, Dense_CudaToDense_Cuda )
+{
+   testCopyAssignment< Dense_cuda,               Dense_cuda >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda >();
+   testCopyAssignment< Dense_cuda,               Dense_cuda_RowMajorOrder >();
+   testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+
+TEST( DenseMatrixCopyTest, CSR_HostToDense_Host )
+{
+   testCopyAssignment< CSR_host, Dense_host >();
+   testCopyAssignment< CSR_host, Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, CSR_HostToDense_cuda )
+{
+   testCopyAssignment< CSR_host, Dense_cuda >();
+   testCopyAssignment< CSR_host, Dense_cuda_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, CSR_CudaToDense_host )
+{
+   testCopyAssignment< CSR_cuda, Dense_host >();
+   testCopyAssignment< CSR_cuda, Dense_host_RowMajorOrder >();
+}
+
+TEST( DenseMatrixCopyTest, CSR_CudaToDense_cuda )
+{
+   testCopyAssignment< CSR_cuda, Dense_cuda >();
+   testCopyAssignment< CSR_cuda, Dense_cuda_RowMajorOrder >();
+}
+#endif
+
+////
+// Tridiagonal matrix assignment test
+TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_host )
+{
+   tridiagonalMatrixAssignment< Dense_host >();
+   tridiagonalMatrixAssignment< Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_cuda )
+{
+   tridiagonalMatrixAssignment< Dense_cuda >();
+   tridiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+////
+// Multidiagonal matrix assignment test
+TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_host )
+{
+   multidiagonalMatrixAssignment< Dense_host >();
+   multidiagonalMatrixAssignment< Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_cuda )
+{
+   multidiagonalMatrixAssignment< Dense_cuda >();
+   multidiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+////
+// Dense matrix assignment test
+TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_host )
+{
+   denseMatrixAssignment< Dense_host >();
+   denseMatrixAssignment< Dense_host_RowMajorOrder >();
+}
+
+#ifdef HAVE_CUDA
+TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_cuda )
+{
+   denseMatrixAssignment< Dense_cuda >();
+   denseMatrixAssignment< Dense_cuda_RowMajorOrder >();
+}
+#endif // HAVE_CUDA
+
+#endif //HAVE_GTEST
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 8d9e9c727a0d88c40b90f0623d8b2ec8808e3f95..37ae58bf1a1e7e8b03220c4916ba79cf48729ef9 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -26,29 +26,21 @@ using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >;
 
 static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <type_traits>
 
 #include <gtest/gtest.h>
 
-template< typename MatrixHostFloat, typename MatrixHostInt >
-void host_test_GetType()
+void test_GetSerializationType()
 {
-    MatrixHostFloat mtrxHostFloat;
-    MatrixHostInt mtrxHostInt;
-    
-    EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) );
-    EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) );
-}
-
-template< typename MatrixCudaFloat, typename MatrixCudaInt >
-void cuda_test_GetType()
-{
-    MatrixCudaFloat mtrxCudaFloat;
-    MatrixCudaInt mtrxCudaInt;
-
-    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) );
-    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Dense< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) );
 }
 
 template< typename Matrix >
@@ -57,13 +49,13 @@ void test_SetDimensions()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 9;
     const IndexType cols = 8;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getRows(), 9 );
     EXPECT_EQ( m.getColumns(), 8 );
 }
@@ -74,38 +66,90 @@ void test_SetLike()
     using RealType = typename Matrix1::RealType;
     using DeviceType = typename Matrix1::DeviceType;
     using IndexType = typename Matrix1::IndexType;
-    
+
     const IndexType rows = 8;
     const IndexType cols = 7;
-    
+
     Matrix1 m1;
     m1.reset();
     m1.setDimensions( rows + 1, cols + 2 );
-    
+
     Matrix2 m2;
     m2.reset();
     m2.setDimensions( rows, cols );
-    
+
     m1.setLike( m2 );
-    
+
     EXPECT_EQ( m1.getRows(), m2.getRows() );
     EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+    Matrix m( rows, cols );
+
+    // Insert values into the rows.
+    RealType value = 1;
+
+    for( IndexType i = 0; i < 3; i++ )      // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 1; i++ )      // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )      // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( IndexType i = 0; i < 4; i++ )      // 5th row
+        m.setElement( 5, i, value++ );
+
+    for( IndexType i = 0; i < 5; i++ )      // 6th row
+        m.setElement( 6, i, value++ );
+
+    for( IndexType i = 0; i < 6; i++ )      // 7th row
+        m.setElement( 7, i, value++ );
+
+    for( IndexType i = 0; i < 7; i++ )      // 8th row
+        m.setElement( 8, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )      // 9th row
+        m.setElement( 9, i, value++ );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
 template< typename Matrix >
 void test_GetRowLength()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 8;
     const IndexType cols = 7;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getRowLength( 0 ), 7 );
     EXPECT_EQ( m.getRowLength( 1 ), 7 );
     EXPECT_EQ( m.getRowLength( 2 ), 7 );
@@ -122,14 +166,14 @@ void test_GetNumberOfMatrixElements()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 7;
     const IndexType cols = 6;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getNumberOfMatrixElements(), 42 );
 }
 
@@ -139,7 +183,7 @@ void test_GetNumberOfNonzeroMatrixElements()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 7x6 dense matrix:
  *
@@ -153,19 +197,19 @@ void test_GetNumberOfNonzeroMatrixElements()
  */
     const IndexType rows = 7;
     const IndexType cols = 6;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0.
     m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0.
-    
+
     EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 );
 }
 
@@ -175,7 +219,7 @@ void test_Reset()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 5x4 dense matrix:
  *
@@ -187,12 +231,12 @@ void test_Reset()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     m.reset();
-    
+
     EXPECT_EQ( m.getRows(), 0 );
     EXPECT_EQ( m.getColumns(), 0 );
 }
@@ -202,7 +246,7 @@ void test_SetValue()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;  
+    using IndexType = typename Matrix::IndexType;
 /*
  * Sets up the following 7x6 dense matrix:
  *
@@ -216,110 +260,110 @@ void test_SetValue()
  */
     const IndexType rows = 7;
     const IndexType cols = 6;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  4 );
     EXPECT_EQ( m.getElement( 0, 4 ),  5 );
     EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  7 );
     EXPECT_EQ( m.getElement( 1, 1 ),  8 );
     EXPECT_EQ( m.getElement( 1, 2 ),  9 );
     EXPECT_EQ( m.getElement( 1, 3 ), 10 );
     EXPECT_EQ( m.getElement( 1, 4 ), 11 );
     EXPECT_EQ( m.getElement( 1, 5 ), 12 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 13 );
     EXPECT_EQ( m.getElement( 2, 1 ), 14 );
     EXPECT_EQ( m.getElement( 2, 2 ), 15 );
     EXPECT_EQ( m.getElement( 2, 3 ), 16 );
     EXPECT_EQ( m.getElement( 2, 4 ), 17 );
     EXPECT_EQ( m.getElement( 2, 5 ), 18 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 19 );
     EXPECT_EQ( m.getElement( 3, 1 ), 20 );
     EXPECT_EQ( m.getElement( 3, 2 ), 21 );
     EXPECT_EQ( m.getElement( 3, 3 ), 22 );
     EXPECT_EQ( m.getElement( 3, 4 ), 23 );
     EXPECT_EQ( m.getElement( 3, 5 ), 24 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 25 );
     EXPECT_EQ( m.getElement( 4, 1 ), 26 );
     EXPECT_EQ( m.getElement( 4, 2 ), 27 );
     EXPECT_EQ( m.getElement( 4, 3 ), 28 );
     EXPECT_EQ( m.getElement( 4, 4 ), 29 );
     EXPECT_EQ( m.getElement( 4, 5 ), 30 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 31 );
     EXPECT_EQ( m.getElement( 5, 1 ), 32 );
     EXPECT_EQ( m.getElement( 5, 2 ), 33 );
     EXPECT_EQ( m.getElement( 5, 3 ), 34 );
     EXPECT_EQ( m.getElement( 5, 4 ), 35 );
     EXPECT_EQ( m.getElement( 5, 5 ), 36 );
-    
+
     EXPECT_EQ( m.getElement( 6, 0 ), 37 );
     EXPECT_EQ( m.getElement( 6, 1 ), 38 );
     EXPECT_EQ( m.getElement( 6, 2 ), 39 );
     EXPECT_EQ( m.getElement( 6, 3 ), 40 );
     EXPECT_EQ( m.getElement( 6, 4 ), 41 );
     EXPECT_EQ( m.getElement( 6, 5 ), 42 );
-    
+
     // Set the values of all elements to a certain number
     m.setValue( 42 );
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ), 42 );
     EXPECT_EQ( m.getElement( 0, 1 ), 42 );
     EXPECT_EQ( m.getElement( 0, 2 ), 42 );
     EXPECT_EQ( m.getElement( 0, 3 ), 42 );
     EXPECT_EQ( m.getElement( 0, 4 ), 42 );
     EXPECT_EQ( m.getElement( 0, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ), 42 );
     EXPECT_EQ( m.getElement( 1, 1 ), 42 );
     EXPECT_EQ( m.getElement( 1, 2 ), 42 );
     EXPECT_EQ( m.getElement( 1, 3 ), 42 );
     EXPECT_EQ( m.getElement( 1, 4 ), 42 );
     EXPECT_EQ( m.getElement( 1, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 42 );
     EXPECT_EQ( m.getElement( 2, 1 ), 42 );
     EXPECT_EQ( m.getElement( 2, 2 ), 42 );
     EXPECT_EQ( m.getElement( 2, 3 ), 42 );
     EXPECT_EQ( m.getElement( 2, 4 ), 42 );
     EXPECT_EQ( m.getElement( 2, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 42 );
     EXPECT_EQ( m.getElement( 3, 1 ), 42 );
     EXPECT_EQ( m.getElement( 3, 2 ), 42 );
     EXPECT_EQ( m.getElement( 3, 3 ), 42 );
     EXPECT_EQ( m.getElement( 3, 4 ), 42 );
     EXPECT_EQ( m.getElement( 3, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 42 );
     EXPECT_EQ( m.getElement( 4, 1 ), 42 );
     EXPECT_EQ( m.getElement( 4, 2 ), 42 );
     EXPECT_EQ( m.getElement( 4, 3 ), 42 );
     EXPECT_EQ( m.getElement( 4, 4 ), 42 );
     EXPECT_EQ( m.getElement( 4, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 42 );
     EXPECT_EQ( m.getElement( 5, 1 ), 42 );
     EXPECT_EQ( m.getElement( 5, 2 ), 42 );
     EXPECT_EQ( m.getElement( 5, 3 ), 42 );
     EXPECT_EQ( m.getElement( 5, 4 ), 42 );
     EXPECT_EQ( m.getElement( 5, 5 ), 42 );
-    
+
     EXPECT_EQ( m.getElement( 6, 0 ), 42 );
     EXPECT_EQ( m.getElement( 6, 1 ), 42 );
     EXPECT_EQ( m.getElement( 6, 2 ), 42 );
@@ -345,40 +389,40 @@ void test_SetElement()
  */
     const IndexType rows = 5;
     const IndexType cols = 5;
-    
+
     Matrix m;
     m.reset();
-    m.setDimensions( rows, cols );    
-    
+    m.setDimensions( rows, cols );
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  4 );
     EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  6 );
     EXPECT_EQ( m.getElement( 1, 1 ),  7 );
     EXPECT_EQ( m.getElement( 1, 2 ),  8 );
     EXPECT_EQ( m.getElement( 1, 3 ),  9 );
     EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 11 );
     EXPECT_EQ( m.getElement( 2, 1 ), 12 );
     EXPECT_EQ( m.getElement( 2, 2 ), 13 );
     EXPECT_EQ( m.getElement( 2, 3 ), 14 );
     EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 16 );
     EXPECT_EQ( m.getElement( 3, 1 ), 17 );
     EXPECT_EQ( m.getElement( 3, 2 ), 18 );
     EXPECT_EQ( m.getElement( 3, 3 ), 19 );
     EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 21 );
     EXPECT_EQ( m.getElement( 4, 1 ), 22 );
     EXPECT_EQ( m.getElement( 4, 2 ), 23 );
@@ -404,53 +448,53 @@ void test_AddElement()
  */
     const IndexType rows = 6;
     const IndexType cols = 5;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             m.setElement( i, j, value++ );
-    
+
     // Check the added elements
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  4 );
     EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  6 );
     EXPECT_EQ( m.getElement( 1, 1 ),  7 );
     EXPECT_EQ( m.getElement( 1, 2 ),  8 );
     EXPECT_EQ( m.getElement( 1, 3 ),  9 );
     EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 11 );
     EXPECT_EQ( m.getElement( 2, 1 ), 12 );
     EXPECT_EQ( m.getElement( 2, 2 ), 13 );
     EXPECT_EQ( m.getElement( 2, 3 ), 14 );
     EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 16 );
     EXPECT_EQ( m.getElement( 3, 1 ), 17 );
     EXPECT_EQ( m.getElement( 3, 2 ), 18 );
     EXPECT_EQ( m.getElement( 3, 3 ), 19 );
     EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 21 );
     EXPECT_EQ( m.getElement( 4, 1 ), 22 );
     EXPECT_EQ( m.getElement( 4, 2 ), 23 );
     EXPECT_EQ( m.getElement( 4, 3 ), 24 );
     EXPECT_EQ( m.getElement( 4, 4 ), 25 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 26 );
     EXPECT_EQ( m.getElement( 5, 1 ), 27 );
     EXPECT_EQ( m.getElement( 5, 2 ), 28 );
     EXPECT_EQ( m.getElement( 5, 3 ), 29 );
     EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-    
+
     // Add new elements to the old elements with a multiplying factor applied to the old elements.
 /*
  * The following setup results in the following 6x5 dense matrix:
@@ -466,38 +510,38 @@ void test_AddElement()
     RealType multiplicator = 2;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
-            m.addElement( i, j, newValue++, multiplicator );    
-    
+            m.addElement( i, j, newValue++, multiplicator );
+
     EXPECT_EQ( m.getElement( 0, 0 ),  3 );
     EXPECT_EQ( m.getElement( 0, 1 ),  6 );
     EXPECT_EQ( m.getElement( 0, 2 ),  9 );
     EXPECT_EQ( m.getElement( 0, 3 ), 12 );
     EXPECT_EQ( m.getElement( 0, 4 ), 15 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ), 18 );
     EXPECT_EQ( m.getElement( 1, 1 ), 21 );
     EXPECT_EQ( m.getElement( 1, 2 ), 24 );
     EXPECT_EQ( m.getElement( 1, 3 ), 27 );
     EXPECT_EQ( m.getElement( 1, 4 ), 30 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ), 33 );
     EXPECT_EQ( m.getElement( 2, 1 ), 36 );
     EXPECT_EQ( m.getElement( 2, 2 ), 39 );
     EXPECT_EQ( m.getElement( 2, 3 ), 42 );
     EXPECT_EQ( m.getElement( 2, 4 ), 45 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 48 );
     EXPECT_EQ( m.getElement( 3, 1 ), 51 );
     EXPECT_EQ( m.getElement( 3, 2 ), 54 );
     EXPECT_EQ( m.getElement( 3, 3 ), 57 );
     EXPECT_EQ( m.getElement( 3, 4 ), 60 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 63 );
     EXPECT_EQ( m.getElement( 4, 1 ), 66 );
     EXPECT_EQ( m.getElement( 4, 2 ), 69 );
     EXPECT_EQ( m.getElement( 4, 3 ), 72 );
     EXPECT_EQ( m.getElement( 4, 4 ), 75 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 78 );
     EXPECT_EQ( m.getElement( 5, 1 ), 81 );
     EXPECT_EQ( m.getElement( 5, 2 ), 84 );
@@ -508,189 +552,195 @@ void test_AddElement()
 template< typename Matrix >
 void test_SetRow()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 3x7 dense matrix:
- *
- *    /  1  2  3  4  5  6  7 \
- *    |  8  9 10 11 12 13 14 |
- *    \ 15 16 17 18 19 20 21 /
- */
-    const IndexType rows = 3;
-    const IndexType cols = 7;
-    
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );       
-    
-    RealType row1 [ 5 ] = { 11, 11, 11, 11, 11 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row2 [ 5 ] = { 22, 22, 22, 22, 22 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row3 [ 5 ] = { 33, 33, 33, 33, 33 }; IndexType colIndexes3 [ 5 ] = { 2, 3, 4, 5, 6 };
-    
-    IndexType row = 0;
-    IndexType elements = 5;
-    
-    m.setRow( row++, colIndexes1, row1, elements );
-    m.setRow( row++, colIndexes2, row2, elements );
-    m.setRow( row++, colIndexes3, row3, elements );
-    
-    EXPECT_EQ( m.getElement( 0, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 1 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 2 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 3 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 4 ), 11 );
-    EXPECT_EQ( m.getElement( 0, 5 ),  6 );
-    EXPECT_EQ( m.getElement( 0, 6 ),  7 );
-    
-    EXPECT_EQ( m.getElement( 1, 0 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 2 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 3 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 22 );
-    EXPECT_EQ( m.getElement( 1, 5 ), 13 );
-    EXPECT_EQ( m.getElement( 1, 6 ), 14 );
-    
-    EXPECT_EQ( m.getElement( 2, 0 ), 15 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 16 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 5 ), 33 );
-    EXPECT_EQ( m.getElement( 2, 6 ), 33 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 3x7 dense matrix:
+    *
+    *    / 11 11 11 11 11  6  7 \
+    *    | 22 22 22 22 22 13 14 |
+    *    \ 15 16 33 33 33 33 33 /
+    */
+   const IndexType rows = 3;
+   const IndexType cols = 7;
+
+   Matrix m;
+   m.reset();
+   m.setDimensions( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m.setElement( i, j, value++ );
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 3 ][ 5 ] {
+         { 11, 11, 11, 11, 11 },
+         { 22, 22, 22, 22, 22 },
+         { 33, 33, 33, 33, 33 } };
+      IndexType columnIndexes[ 3 ][ 5 ] {
+         { 0, 1, 2, 3, 4 },
+         { 0, 1, 2, 3, 4 },
+         { 2, 3, 4, 5, 6 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  7 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 22 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 14 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 33 );
 }
 
 template< typename Matrix >
 void test_AddRow()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 6x5 dense matrix:
- *
- *    /  1  2  3  4  5 \
- *    |  6  7  8  9 10 |
- *    | 11 12 13 14 15 |
- *    | 16 17 18 19 20 |
- *    | 21 22 23 24 25 |
- *    \ 26 27 28 29 30 /
- */
-    const IndexType rows = 6;
-    const IndexType cols = 5;
-    
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
-    
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            m.setElement( i, j, value++ );
-    
-    // Check the added elements
-    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( m.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( m.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( m.getElement( 0, 3 ),  4 );
-    EXPECT_EQ( m.getElement( 0, 4 ),  5 );
-    
-    EXPECT_EQ( m.getElement( 1, 0 ),  6 );
-    EXPECT_EQ( m.getElement( 1, 1 ),  7 );
-    EXPECT_EQ( m.getElement( 1, 2 ),  8 );
-    EXPECT_EQ( m.getElement( 1, 3 ),  9 );
-    EXPECT_EQ( m.getElement( 1, 4 ), 10 );
-    
-    EXPECT_EQ( m.getElement( 2, 0 ), 11 );
-    EXPECT_EQ( m.getElement( 2, 1 ), 12 );
-    EXPECT_EQ( m.getElement( 2, 2 ), 13 );
-    EXPECT_EQ( m.getElement( 2, 3 ), 14 );
-    EXPECT_EQ( m.getElement( 2, 4 ), 15 );
-    
-    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
-    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
-    EXPECT_EQ( m.getElement( 3, 2 ), 18 );
-    EXPECT_EQ( m.getElement( 3, 3 ), 19 );
-    EXPECT_EQ( m.getElement( 3, 4 ), 20 );
-    
-    EXPECT_EQ( m.getElement( 4, 0 ), 21 );
-    EXPECT_EQ( m.getElement( 4, 1 ), 22 );
-    EXPECT_EQ( m.getElement( 4, 2 ), 23 );
-    EXPECT_EQ( m.getElement( 4, 3 ), 24 );
-    EXPECT_EQ( m.getElement( 4, 4 ), 25 );
-    
-    EXPECT_EQ( m.getElement( 5, 0 ), 26 );
-    EXPECT_EQ( m.getElement( 5, 1 ), 27 );
-    EXPECT_EQ( m.getElement( 5, 2 ), 28 );
-    EXPECT_EQ( m.getElement( 5, 3 ), 29 );
-    EXPECT_EQ( m.getElement( 5, 4 ), 30 );
-    
-    // Add new elements to the old elements with a multiplying factor applied to the old elements.
-/*
- * The following setup results in the following 6x5 sparse matrix:
- *
- *    /  3  6  9 12 15 \
- *    | 18 21 24 27 30 |
- *    | 33 36 39 42 45 |
- *    | 48 51 54 57 60 |
- *    | 63 66 69 72 75 |
- *    \ 78 81 84 87 90 /
- */
-    
-    RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row3 [ 5 ] = { 44, 44, 44, 44, 0 }; IndexType colIndexes3 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row4 [ 5 ] = { 55, 55, 55, 55, 0 }; IndexType colIndexes4 [ 5 ] = { 0, 1, 2, 3, 4 };
-    RealType row5 [ 5 ] = { 66, 66, 66, 66, 0 }; IndexType colIndexes5 [ 5 ] = { 0, 1, 2, 3, 4 };
-    
-    IndexType row = 0;
-    IndexType elements = 5;
-    RealType thisRowMultiplicator = 0;
-    
-    m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ );
-    m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );
-    
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 6x5 dense matrix:
+    *
+    *    /  1  2  3  4  5 \
+    *    |  6  7  8  9 10 |
+    *    | 11 12 13 14 15 |
+    *    | 16 17 18 19 20 |
+    *    | 21 22 23 24 25 |
+    *    \ 26 27 28 29 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         m.setElement( i, j, value++ );
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 10 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 23 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 26 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 27 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 28 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 29 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9 12 15 \
+    *    | 18 21 24 27 30 |
+    *    | 33 36 39 42 45 |
+    *    | 48 51 54 57 60 |
+    *    | 63 66 69 72 75 |
+    *    \ 78 81 84 87 90 /
+    */
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 6 ][ 5 ] {
+         { 11, 11, 11, 11, 0 },
+         { 22, 22, 22, 22, 0 },
+         { 33, 33, 33, 33, 0 },
+         { 44, 44, 44, 44, 0 },
+         { 55, 55, 55, 55, 0 },
+         { 66, 66, 66, 66, 0 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 5; i++ )
+      {
+         RealType& val = row.getValue( i );
+         val = rowIdx * val + values[ rowIdx ][ i ];
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
+
+
     EXPECT_EQ( m.getElement( 0, 0 ),  11 );
     EXPECT_EQ( m.getElement( 0, 1 ),  11 );
     EXPECT_EQ( m.getElement( 0, 2 ),  11 );
     EXPECT_EQ( m.getElement( 0, 3 ),  11 );
     EXPECT_EQ( m.getElement( 0, 4 ),   0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  28 );
     EXPECT_EQ( m.getElement( 1, 1 ),  29 );
     EXPECT_EQ( m.getElement( 1, 2 ),  30 );
     EXPECT_EQ( m.getElement( 1, 3 ),  31 );
     EXPECT_EQ( m.getElement( 1, 4 ),  10 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  55 );
     EXPECT_EQ( m.getElement( 2, 1 ),  57 );
     EXPECT_EQ( m.getElement( 2, 2 ),  59 );
     EXPECT_EQ( m.getElement( 2, 3 ),  61 );
     EXPECT_EQ( m.getElement( 2, 4 ),  30 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ),  92 );
     EXPECT_EQ( m.getElement( 3, 1 ),  95 );
     EXPECT_EQ( m.getElement( 3, 2 ),  98 );
     EXPECT_EQ( m.getElement( 3, 3 ), 101 );
     EXPECT_EQ( m.getElement( 3, 4 ),  60 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 139 );
     EXPECT_EQ( m.getElement( 4, 1 ), 143 );
     EXPECT_EQ( m.getElement( 4, 2 ), 147 );
     EXPECT_EQ( m.getElement( 4, 3 ), 151 );
     EXPECT_EQ( m.getElement( 4, 4 ), 100 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 196 );
     EXPECT_EQ( m.getElement( 5, 1 ), 201 );
     EXPECT_EQ( m.getElement( 5, 2 ), 206 );
@@ -715,31 +765,31 @@ void test_VectorProduct()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++)
             m.setElement( i, j, value++ );
 
     using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
-    
+
     VectorType inVector;
     inVector.setSize( 4 );
-    for( IndexType i = 0; i < inVector.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector.getSize(); i++ )
         inVector.setElement( i, 2 );
 
-    VectorType outVector;  
+    VectorType outVector;
     outVector.setSize( 5 );
     for( IndexType j = 0; j < outVector.getSize(); j++ )
         outVector.setElement( j, 0 );
- 
-    
+
+
     m.vectorProduct( inVector, outVector);
-   
+
     EXPECT_EQ( outVector.getElement( 0 ),  20 );
     EXPECT_EQ( outVector.getElement( 1 ),  52 );
     EXPECT_EQ( outVector.getElement( 2 ),  84 );
@@ -764,16 +814,16 @@ void test_AddMatrix()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++)
             m.setElement( i, j, value++ );
-    
+
 /*
  * Sets up the following 5x4 dense matrix:
  *
@@ -783,16 +833,16 @@ void test_AddMatrix()
  *    | 13 14 15 16 |
  *    \ 17 18 19 20 /
  */
-    
+
     Matrix m2;
     m2.reset();
     m2.setDimensions( rows, cols );
-    
+
     RealType newValue = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++)
             m2.setElement( i, j, newValue++ );
-    
+
     /*
  * Sets up the following 5x4 dense matrix:
  *
@@ -802,63 +852,63 @@ void test_AddMatrix()
  *    | 13 14 15 16 |
  *    \ 17 18 19 20 /
  */
-    
+
     Matrix mResult;
     mResult.reset();
     mResult.setDimensions( rows, cols );
-    
+
     mResult = m;
-    
+
     RealType matrixMultiplicator = 2;
     RealType thisMatrixMultiplicator = 1;
-    
+
     mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
-    
+
     EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
     EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
     EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
     EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
     EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
     EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
     EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
     EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
     EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
     EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
     EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
     EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
     EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
     EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
     EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
     EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
-    
+
     EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
     EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
     EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
     EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
-    
+
     EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
     EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
     EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
     EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
-    
+
     EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
     EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
     EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
     EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
-    
+
     EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
     EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
     EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
     EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
-    
+
     EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
     EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
     EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
@@ -882,16 +932,16 @@ void test_GetMatrixProduct()
  */
     const IndexType leftRows = 5;
     const IndexType leftCols = 4;
-    
+
     Matrix leftMatrix;
     leftMatrix.reset();
     leftMatrix.setDimensions( leftRows, leftCols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < leftRows; i++ )
         for( IndexType j = 0; j < leftCols; j++)
             leftMatrix.setElement( i, j, value++ );
-    
+
 /*
  * Sets up the following 4x5 dense matrix:
  *
@@ -902,16 +952,16 @@ void test_GetMatrixProduct()
  */
     const IndexType rightRows = 4;
     const IndexType rightCols = 5;
-    
+
     Matrix rightMatrix;
     rightMatrix.reset();
     rightMatrix.setDimensions( rightRows, rightCols );
-    
+
     RealType newValue = 1;
     for( IndexType i = 0; i < rightRows; i++ )
         for( IndexType j = 0; j < rightCols; j++)
             rightMatrix.setElement( i, j, newValue++ );
-    
+
 /*
  * Sets up the following 5x5 resulting dense matrix:
  *
@@ -921,48 +971,48 @@ void test_GetMatrixProduct()
  *    |  0  0  0  0 |
  *    \  0  0  0  0 /
  */
-    
+
     Matrix mResult;
     mResult.reset();
     mResult.setDimensions( leftRows, rightCols );
     mResult.setValue( 0 );
-    
+
     RealType leftMatrixMultiplicator = 1;
     RealType rightMatrixMultiplicator = 2;
-/*   
+/*
  *      /  1  2  3  4 \                            /  220  240  260  280  300 \
  *      |  5  6  7  8 |       /  1  2  3  4  5 \   |  492  544  596  648  700 |
  *  1 * |  9 10 11 12 | * 2 * |  6  7  8  9 10 | = |  764  848  932 1016 1100 |
  *      | 13 14 15 16 |       | 11 12 13 14 15 |   | 1036 1152 1268 1384 1500 |
  *      \ 17 18 19 20 /       \ 16 17 18 19 20 /   \ 1308 1456 1604 1752 1900 /
  */
-    
+
     mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator );
-    
+
     EXPECT_EQ( mResult.getElement( 0, 0 ),  220 );
     EXPECT_EQ( mResult.getElement( 0, 1 ),  240 );
     EXPECT_EQ( mResult.getElement( 0, 2 ),  260 );
     EXPECT_EQ( mResult.getElement( 0, 3 ),  280 );
     EXPECT_EQ( mResult.getElement( 0, 4 ),  300 );
-    
+
     EXPECT_EQ( mResult.getElement( 1, 0 ),  492 );
     EXPECT_EQ( mResult.getElement( 1, 1 ),  544 );
     EXPECT_EQ( mResult.getElement( 1, 2 ),  596 );
     EXPECT_EQ( mResult.getElement( 1, 3 ),  648 );
     EXPECT_EQ( mResult.getElement( 1, 4 ),  700 );
-    
+
     EXPECT_EQ( mResult.getElement( 2, 0 ),  764 );
     EXPECT_EQ( mResult.getElement( 2, 1 ),  848 );
     EXPECT_EQ( mResult.getElement( 2, 2 ),  932 );
     EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 );
     EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 );
-    
+
     EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 );
     EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 );
     EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 );
     EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 );
     EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 );
-    
+
     EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 );
     EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 );
     EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 );
@@ -996,36 +1046,36 @@ void test_GetTransposition()
             m.setElement( i, j, value++ );
 
     m.print( std::cout );
-    
+
 /*
  * Sets up the following 2x3 dense matrix:
  *
  *    /  0  0  0 \
  *    \  0  0  0 /
- */ 
+ */
     Matrix mTransposed;
     mTransposed.reset();
     mTransposed.setDimensions( cols, rows );
-    
+
     mTransposed.print( std::cout );
-    
+
     RealType matrixMultiplicator = 1;
-    
+
     mTransposed.getTransposition( m, matrixMultiplicator );
-    
+
     mTransposed.print( std::cout );
-    
+
 /*
  * Should result in the following 2x3 dense matrix:
  *
  *    /  1  3  5 \
  *    \  2  4  6 /
- */ 
-    
+ */
+
     EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 );
     EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 );
     EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 );
-    
+
     EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 );
     EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 );
     EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 );
@@ -1048,66 +1098,114 @@ void test_PerformSORIteration()
  */
     const IndexType rows = 4;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     m.setElement( 0, 0, 4.0 );        // 0th row
     m.setElement( 0, 1, 1.0 );
     m.setElement( 0, 2, 1.0 );
     m.setElement( 0, 3, 1.0 );
-        
+
     m.setElement( 1, 0, 1.0 );        // 1st row
     m.setElement( 1, 1, 4.0 );
     m.setElement( 1, 2, 1.0 );
     m.setElement( 1, 3, 1.0 );
-        
+
     m.setElement( 2, 0, 1.0 );
     m.setElement( 2, 1, 1.0 );        // 2nd row
     m.setElement( 2, 2, 4.0 );
     m.setElement( 2, 3, 1.0 );
-        
+
     m.setElement( 3, 0, 1.0 );        // 3rd row
     m.setElement( 3, 1, 1.0 );
     m.setElement( 3, 2, 1.0 );
     m.setElement( 3, 3, 4.0 );
-    
+
     RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
     RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
-    
+
     IndexType row = 0;
     RealType omega = 1;
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ],  1.0 );
     EXPECT_EQ( xVector[ 2 ],  1.0 );
     EXPECT_EQ( xVector[ 3 ],  1.0 );
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ], -0.125 );
     EXPECT_EQ( xVector[ 2 ],  1.0 );
     EXPECT_EQ( xVector[ 3 ],  1.0 );
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ], -0.125 );
     EXPECT_EQ( xVector[ 2 ],  0.15625 );
     EXPECT_EQ( xVector[ 3 ],  1.0 );
-    
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], -0.5 );
     EXPECT_EQ( xVector[ 1 ], -0.125 );
     EXPECT_EQ( xVector[ 2 ], 0.15625 );
     EXPECT_EQ( xVector[ 3 ], 0.3671875 );
 }
 
+template< typename Matrix >
+void test_AssignmentOperator()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix( rows, columns );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+
 template< typename Matrix >
 void test_SaveAndLoad()
 {
@@ -1124,65 +1222,61 @@ void test_SaveAndLoad()
  */
     const IndexType rows = 4;
     const IndexType cols = 4;
-    
+
     Matrix savedMatrix;
     savedMatrix.reset();
     savedMatrix.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
         for( IndexType j = 0; j < cols; j++ )
             savedMatrix.setElement( i, j, value++ );
-        
+
     ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
-    
+
     Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( rows, cols );
-    
+
     ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
-    
-    std::cout << "\nThis will create a file called '" << TEST_FILE_NAME << "' (of the matrix created in the test function), in .../tnl-dev/Debug/bin/\n\n";
 }
 
 template< typename Matrix >
@@ -1202,33 +1296,33 @@ void test_Print()
  */
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++)
         for( IndexType j = 0; j < cols; j++)
             m.setElement( i, j, value++ );
-    
+
     #include <sstream>
     std::stringstream printed;
     std::stringstream couted;
-    
+
     //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf()); 
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
     m.print( std::cout ); //all the std::cout goes to ss
 
     std::cout.rdbuf(old_buf); //reset
-    
+
     couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
               "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
               "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
               "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
               "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
-    
+
     EXPECT_EQ( printed.str(), couted.str() );
 }
 
@@ -1273,108 +1367,120 @@ using MatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
 
+TYPED_TEST( MatrixTest, getSerializationType )
+{
+   test_GetSerializationType();
+}
+
 TYPED_TEST( MatrixTest, setDimensionsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetDimensions< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setLikeTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetLike< MatrixType, MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getRowLengthTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_GetRowLength< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_GetNumberOfMatrixElements< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_GetNumberOfNonzeroMatrixElements< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, resetTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_Reset< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setValueTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetValue< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setElementTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetElement< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, addElementTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_AddElement< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, setRowTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SetRow< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, addRowTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_AddRow< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_VectorProduct< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, addMatrixTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_AddMatrix< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, saveAndLoadTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_SaveAndLoad< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, printTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
-    
+
     test_Print< MatrixType >();
 }
 
@@ -1392,7 +1498,7 @@ TYPED_TEST( MatrixTest, printTest )
 //}
 //#endif
 
-TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
+/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
 {
     bool testRan = false;
     EXPECT_TRUE( testRan );
@@ -1496,7 +1602,8 @@ TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda )
     std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n";
 }
 #endif
+ * */
 
-#endif
+#endif // HAVE_GTEST
 
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h
index 93673a29063db52afcabf59165ea5949471cf1bc..9487e59776b4452f42e894ddc856100edde62b9f 100644
--- a/src/UnitTests/Matrices/DistributedMatrixTest.h
+++ b/src/UnitTests/Matrices/DistributedMatrixTest.h
@@ -13,7 +13,7 @@
 #include <TNL/Communicators/NoDistrCommunicator.h>
 #include <TNL/Matrices/DistributedMatrix.h>
 #include <TNL/Containers/Partitioner.h>
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Legacy/CSR.h>
 
 using namespace TNL;
 
@@ -171,7 +171,7 @@ TYPED_TEST( DistributedMatrixTest, getCompressedRowLengths )
 
    this->matrix.setCompressedRowLengths( this->rowLengths );
    RowLengthsVector output;
-   this->matrix.getCompressedRowLengths( output );
+   this->matrix.getCompressedRowLengths( output ); // TODO: replace this with getRowCapacities
    EXPECT_EQ( output, this->rowLengths );
 }
 
diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46c6be2cdacbb24648f60aa9e6337f49cd59ad8b
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt
@@ -0,0 +1,72 @@
+IF( BUILD_CUDA )
+   #CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+ELSE(  BUILD_CUDA )
+   #ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp )
+   #TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} )
+   #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} )
+
+   ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp )
+   TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} )
+
+ENDIF( BUILD_CUDA )
+
+
+#ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+# TODO: Uncomment the following when AdEllpack works
+#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} )
+# TODO: DenseMatrixTest is not finished
+#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..30b8f64ecfdbf228856d272a71d3de08980f3987
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixCopyTest.cpp  -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..431fe481c2db1d5b18cfa849e882c0ed836463c1
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixCopyTest.cu  -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..aece39d9a848e02ea32f5fc29446a6b1dfbf7f37
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h
@@ -0,0 +1,573 @@
+/***************************************************************************
+                          SparseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
+using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
+using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
+using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
+using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;
+
+/*template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;*/
+
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+/*
+ * Sets up the following 10x6 sparse matrix:
+ *
+ *    /  1  2             \
+ *    |           3  4  5 |
+ *    |  6  7  8          |
+ *    |     9 10 11 12 13 |
+ *    | 14 15 16 17 18    |
+ *    | 19 20             |
+ *    | 21                |
+ *    | 22                |
+ *    | 23 24 25 26 27    |
+ *    \                28 /
+ */
+template< typename Matrix >
+void setupUnevenRowSizeMatrix( Matrix& m )
+{
+    const int rows = 10;
+    const int cols = 6;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 5 );
+    rowLengths.setElement( 0, 2 );
+    rowLengths.setElement( 1,  3 );
+    rowLengths.setElement( 2,  3 );
+    rowLengths.setElement( 5,  2 );
+    rowLengths.setElement( 6,  1 );
+    rowLengths.setElement( 7,  1 );
+    rowLengths.setElement( 9,  1 );
+    m.setCompressedRowLengths( rowLengths );
+
+    int value = 1;
+    for( int i = 0; i < cols - 4; i++ )  // 0th row
+        m.setElement( 0, i, value++ );
+
+    for( int i = 3; i < cols; i++ )      // 1st row
+        m.setElement( 1, i, value++ );
+
+    for( int i = 0; i < cols - 3; i++ )  // 2nd row
+        m.setElement( 2, i, value++ );
+
+    for( int i = 1; i < cols; i++ )      // 3rd row
+        m.setElement( 3, i, value++ );
+
+    for( int i = 0; i < cols - 1; i++ )  // 4th row
+        m.setElement( 4, i, value++ );
+
+    for( int i = 0; i < cols - 4; i++ )  // 5th row
+        m.setElement( 5, i, value++ );
+
+    m.setElement( 6, 0, value++ );   // 6th row
+
+    m.setElement( 7, 0, value++ );   // 7th row
+
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
+        m.setElement( 8, i, value++ );
+
+    m.setElement( 9, 5, value++ );   // 9th row
+}
+
+template< typename Matrix >
+void checkUnevenRowSizeMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 10 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  5 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 20 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 28 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    /              2  1 \
+ *    |           5  4  3 |
+ *    |        8  7  6    |
+ *    |    11 10  9       |
+ *    | 14 13 12          |
+ *    | 16 15             |
+ *    \ 17                /
+ */
+template< typename Matrix >
+void setupAntiTriDiagMatrix( Matrix& m )
+{
+    const int rows = 7;
+    const int cols = 6;
+    m.reset();
+    m.setDimensions( rows, cols );
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setValue( 3 );
+    rowLengths.setElement( 0, 4);
+    rowLengths.setElement( 1,  4 );
+    m.setCompressedRowLengths( rowLengths );
+
+    int value = 1;
+    for( int i = 0; i < rows; i++ )
+        for( int j = cols - 1; j > 2; j-- )
+            if( j - i + 1 < cols && j - i + 1 >= 0 )
+                m.setElement( i, j - i + 1, value++ );
+}
+
+template< typename Matrix >
+void checkAntiTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  1);
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  3 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 17 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+}
+
+/*
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  2             \
+ *    | 3  4  5          |
+ *    |    6  7  8       |
+ *    |       9 10 11    |
+ *    |         12 13 14 |
+ *    |            15 16 |
+ *    \               17 /
+ */
+template< typename Matrix >
+void setupTriDiagMatrix( Matrix& m )
+{
+   const int rows = 7;
+   const int cols = 6;
+   m.reset();
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, value++ );
+}
+
+template< typename Matrix >
+void checkTriDiagMatrix( Matrix& m )
+{
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 14 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 16 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 17 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+{
+   {
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag2 );
+   }
+   {
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+   {
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+template< typename Matrix1, typename Matrix2 >
+void testConversion()
+{
+
+   {
+        SCOPED_TRACE("Tri Diagonal Matrix");
+
+        Matrix1 triDiag1;
+        setupTriDiagMatrix( triDiag1 );
+        checkTriDiagMatrix( triDiag1 );
+
+        Matrix2 triDiag2;
+        //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
+        triDiag2 = triDiag1;
+        checkTriDiagMatrix( triDiag2 );
+   }
+
+   {
+        SCOPED_TRACE("Anti Tri Diagonal Matrix");
+
+        Matrix1 antiTriDiag1;
+        setupAntiTriDiagMatrix( antiTriDiag1 );
+        checkAntiTriDiagMatrix( antiTriDiag1 );
+
+        Matrix2 antiTriDiag2;
+        //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
+        antiTriDiag2 = antiTriDiag1;
+        checkAntiTriDiagMatrix( antiTriDiag2 );
+   }
+
+   {
+        SCOPED_TRACE("Uneven Row Size Matrix");
+        Matrix1 unevenRowSize1;
+        setupUnevenRowSizeMatrix( unevenRowSize1 );
+        checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+        Matrix2 unevenRowSize2;
+        //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
+        unevenRowSize2 = unevenRowSize1;
+        checkUnevenRowSizeMatrix( unevenRowSize2 );
+   }
+}
+
+TEST( SparseMatrixCopyTest, CSR_HostToHost )
+{
+   testCopyAssignment< CSR_host, CSR_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, CSR_HostToCuda )
+{
+   testCopyAssignment< CSR_host, CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_CudaToHost )
+{
+   testCopyAssignment< CSR_cuda, CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_CudaToCuda )
+{
+   testCopyAssignment< CSR_cuda, CSR_cuda >();
+}
+#endif
+
+
+TEST( SparseMatrixCopyTest, Ellpack_HostToHost )
+{
+   testCopyAssignment< E_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, Ellpack_HostToCuda )
+{
+   testCopyAssignment< E_host, E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_CudaToHost )
+{
+   testCopyAssignment< E_cuda, E_host >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_CudaToCuda )
+{
+   testCopyAssignment< E_cuda, E_cuda >();
+}
+#endif
+
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_HostToHost )
+{
+   testCopyAssignment< SE_host, SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, SlicedEllpack_HostToCuda )
+{
+   testCopyAssignment< SE_host, SE_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToHost )
+{
+   testCopyAssignment< SE_cuda, SE_host >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
+{
+   testCopyAssignment< SE_cuda, SE_cuda >();
+}
+#endif
+
+
+// test conversion between formats
+TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host )
+{
+   testConversion< CSR_host, E_host >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_CSR_host )
+{
+   testConversion< E_host, CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_host )
+{
+   testConversion< CSR_host, SE_host >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_host )
+{
+   testConversion< SE_host, CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host )
+{
+   testConversion< E_host, SE_host >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host )
+{
+   testConversion< SE_host, E_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, CSR_to_Ellpack_cuda )
+{
+   testConversion< CSR_cuda, E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_CSR_cuda )
+{
+   testConversion< E_cuda, CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda )
+{
+   testConversion< CSR_cuda, SE_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda )
+{
+   testConversion< SE_cuda, CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda )
+{
+   testConversion< E_cuda, SE_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
+{
+   testConversion< SE_cuda, E_cuda >();
+}
+#endif
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..ed8bec79603b41e291246428cb59b9a040a56744
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h
@@ -0,0 +1,39 @@
+/***************************************************************************
+                          SparseMatrixTest.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+
+using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >;
+using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+
+using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >;
+using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
+
+TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host )
+{
+    test_PerformSORIteration< CSR_host_float >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda )
+{
+   //    test_PerformSORIteration< CSR_cuda_float >();
+}
+#endif
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp
similarity index 75%
rename from src/UnitTests/Matrices/SparseMatrixTest.hpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp
index ef5b28d240a65c5e26eb987c42b76688c59a8d87..c6ff5cbd7349ed52e65d794b3a4df0c7915ba8e6 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp
@@ -11,14 +11,15 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
 #include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <iostream>
 
 // Temporary, until test_OperatorEquals doesn't work for all formats.
-#include <TNL/Matrices/ChunkedEllpack.h>
-#include <TNL/Matrices/AdEllpack.h>
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
 template< typename MatrixHostFloat, typename MatrixHostInt >
@@ -36,7 +37,7 @@ void cuda_test_GetType()
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;    
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
 }
 
 template< typename Matrix >
@@ -45,13 +46,13 @@ void test_SetDimensions()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 9;
     const IndexType cols = 8;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     EXPECT_EQ( m.getRows(), 9 );
     EXPECT_EQ( m.getColumns(), 8 );
 }
@@ -62,41 +63,41 @@ void test_SetCompressedRowLengths()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
     const IndexType rows = 10;
     const IndexType cols = 11;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
     rowLengths.setValue( 3 );
-    
+
     IndexType rowLength = 1;
     for( IndexType i = 2; i < rows; i++ )
         rowLengths.setElement( i, rowLength++ );
-    
+
     m.setCompressedRowLengths( rowLengths );
-    
+
     // Insert values into the rows.
     RealType value = 1;
-    
+
     for( IndexType i = 0; i < 3; i++ )      // 0th row
         m.setElement( 0, i, value++ );
-    
+
     for( IndexType i = 0; i < 3; i++ )      // 1st row
         m.setElement( 1, i, value++ );
-    
+
     for( IndexType i = 0; i < 1; i++ )      // 2nd row
         m.setElement( 2, i, value++ );
-    
+
     for( IndexType i = 0; i < 2; i++ )      // 3rd row
         m.setElement( 3, i, value++ );
-        
+
     for( IndexType i = 0; i < 3; i++ )      // 4th row
         m.setElement( 4, i, value++ );
-        
+
     for( IndexType i = 0; i < 4; i++ )      // 5th row
         m.setElement( 5, i, value++ );
 
@@ -111,8 +112,8 @@ void test_SetCompressedRowLengths()
 
     for( IndexType i = 0; i < 8; i++ )      // 9th row
         m.setElement( 9, i, value++ );
-    
-    
+
+
     EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 );
     EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 );
     EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 );
@@ -131,32 +132,101 @@ void test_SetLike()
     using RealType = typename Matrix1::RealType;
     using DeviceType = typename Matrix1::DeviceType;
     using IndexType = typename Matrix1::IndexType;
-        
+
     const IndexType rows = 8;
     const IndexType cols = 7;
-    
+
     Matrix1 m1;
     m1.reset();
     m1.setDimensions( rows + 1, cols + 2 );
-    
+
     Matrix2 m2;
     m2.reset();
     m2.setDimensions( rows, cols );
-    
+
     m1.setLike( m2 );
-    
-    
+
+
     EXPECT_EQ( m1.getRows(), m2.getRows() );
     EXPECT_EQ( m1.getColumns(), m2.getColumns() );
 }
 
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m;
+   m.reset();
+
+   m.setDimensions( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setElement( 0, 4 );
+   rowLengths.setElement( 1, 3 );
+   rowLengths.setElement( 2, 8 );
+   rowLengths.setElement( 3, 2 );
+   for( IndexType i = 4; i < rows - 2; i++ )
+   {
+      rowLengths.setElement( i, 1 );
+   }
+   rowLengths.setElement( 8, 10 );
+   rowLengths.setElement( 9, 10 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+   {
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+   }
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+}
+
 template< typename Matrix >
 void test_Reset()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 5x4 sparse matrix:
  *
@@ -166,27 +236,253 @@ void test_Reset()
  *    |  0  0  0  0 |
  *    \  0  0  0  0 /
  */
-    
+
     const IndexType rows = 5;
     const IndexType cols = 4;
-    
+
     Matrix m;
     m.setDimensions( rows, cols );
-    
+
     m.reset();
-    
-    
+
+
     EXPECT_EQ( m.getRows(), 0 );
     EXPECT_EQ( m.getColumns(), 0 );
 }
 
+template< typename Matrix >
+void test_GetRow()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+
+/*
+ * Sets up the following 10x10 sparse matrix:
+ *
+ *    /  1  0  2  0  3  0  4  0  0  0  \
+ *    |  5  6  7  0  0  0  0  0  0  0  |
+ *    |  8  9 10 11 12 13 14 15  0  0  |
+ *    | 16 17  0  0  0  0  0  0  0  0  |
+ *    | 18  0  0  0  0  0  0  0  0  0  |
+ *    | 19  0  0  0  0  0  0  0  0  0  |
+ *    | 20  0  0  0  0  0  0  0  0  0  |
+ *    | 21  0  0  0  0  0  0  0  0  0  |
+ *    | 22 23 24 25 26 27 28 29 30 31  |
+ *    \ 32 33 34 35 36 37 38 39 40 41 /
+ */
+
+    const IndexType rows = 10;
+    const IndexType cols = 10;
+
+    Matrix m( rows, cols );
+
+    typename Matrix::CompressedRowLengthsVector rowLengths;
+    rowLengths.setSize( rows );
+    rowLengths.setElement( 0, 4 );
+    rowLengths.setElement( 1, 3 );
+    rowLengths.setElement( 2, 8 );
+    rowLengths.setElement( 3, 2 );
+    for( IndexType i = 4; i < rows - 2; i++ )
+    {
+        rowLengths.setElement( i, 1 );
+    }
+    rowLengths.setElement( 8, 10 );
+    rowLengths.setElement( 9, 10 );
+    m.setCompressedRowLengths( rowLengths );
+
+    /*RealType value = 1;
+    for( IndexType i = 0; i < 4; i++ )
+        m.setElement( 0, 2 * i, value++ );
+
+    for( IndexType i = 0; i < 3; i++ )
+        m.setElement( 1, i, value++ );
+
+    for( IndexType i = 0; i < 8; i++ )
+        m.setElement( 2, i, value++ );
+
+    for( IndexType i = 0; i < 2; i++ )
+        m.setElement( 3, i, value++ );
+
+    for( IndexType i = 4; i < 8; i++ )
+        m.setElement( i, 0, value++ );
+
+    for( IndexType j = 8; j < rows; j++)
+    {
+        for( IndexType i = 0; i < cols; i++ )
+            m.setElement( j, i, value++ );
+    }*/
+    auto matrixView = m.getView();
+    auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+       auto row = matrixView.getRow( rowIdx );
+       RealType val;
+       switch( rowIdx )
+       {
+          case 0:
+            val = 1;
+            for( IndexType i = 0; i < 4; i++ )
+               row.setElement( i, 2 * i, val++ );
+            break;
+         case 1:
+            val = 5;
+            for( IndexType i = 0; i < 3; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 2:
+            val = 8;
+            for( IndexType i = 0; i < 8; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 3:
+            val = 16;
+            for( IndexType i = 0; i < 2; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 4:
+            row.setElement( 0, 0, 18 );
+            break;
+         case 5:
+            row.setElement( 0, 0, 19 );
+            break;
+         case 6:
+            row.setElement( 0, 0, 20 );
+            break;
+         case 7:
+            row.setElement( 0, 0, 21 );
+            break;
+         case 8:
+             val = 22;
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, val++ );
+             break;
+         case 9:
+             val = 32;
+             for( IndexType i = 0; i < rows; i++ )
+                row.setElement( i, i, val++ );
+             break;
+       }
+    };
+    TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+    EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+    EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+    EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+    EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+    EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+    EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+    EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+    EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+    EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+    EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+    EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+    EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+    EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+    EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+    EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+    EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+    EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+    EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+    EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+    EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+    EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+    EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+    EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+    EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+    EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+    EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+    EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+    EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+    EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+    EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+    EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+    EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+    EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+    EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+    EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+    EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+    EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+    EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+    EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+    EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+    EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+    EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+
 template< typename Matrix >
 void test_SetElement()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 10x10 sparse matrix:
  *
@@ -201,15 +497,15 @@ void test_SetElement()
  *    | 22 23 24 25 26 27 28 29 30 31  |
  *    \ 32 33 34 35 36 37 38 39 40 41 /
  */
-    
+
     const IndexType rows = 10;
     const IndexType cols = 10;
-    
+
     Matrix m;
     m.reset();
-    
+
     m.setDimensions( rows, cols );
-    
+
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
     rowLengths.setElement( 0, 4 );
@@ -223,29 +519,29 @@ void test_SetElement()
     rowLengths.setElement( 8, 10 );
     rowLengths.setElement( 9, 10 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < 4; i++ )
         m.setElement( 0, 2 * i, value++ );
-    
+
     for( IndexType i = 0; i < 3; i++ )
         m.setElement( 1, i, value++ );
-    
+
     for( IndexType i = 0; i < 8; i++ )
         m.setElement( 2, i, value++ );
-    
+
     for( IndexType i = 0; i < 2; i++ )
         m.setElement( 3, i, value++ );
-    
+
     for( IndexType i = 4; i < 8; i++ )
         m.setElement( i, 0, value++ );
-    
+
     for( IndexType j = 8; j < rows; j++)
     {
         for( IndexType i = 0; i < cols; i++ )
             m.setElement( j, i, value++ );
     }
-    
+
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  0 );
     EXPECT_EQ( m.getElement( 0, 2 ),  2 );
@@ -256,7 +552,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 0, 7 ),  0 );
     EXPECT_EQ( m.getElement( 0, 8 ),  0 );
     EXPECT_EQ( m.getElement( 0, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  5 );
     EXPECT_EQ( m.getElement( 1, 1 ),  6 );
     EXPECT_EQ( m.getElement( 1, 2 ),  7 );
@@ -267,7 +563,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 1, 7 ),  0 );
     EXPECT_EQ( m.getElement( 1, 8 ),  0 );
     EXPECT_EQ( m.getElement( 1, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  8 );
     EXPECT_EQ( m.getElement( 2, 1 ),  9 );
     EXPECT_EQ( m.getElement( 2, 2 ), 10 );
@@ -278,7 +574,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 2, 7 ), 15 );
     EXPECT_EQ( m.getElement( 2, 8 ),  0 );
     EXPECT_EQ( m.getElement( 2, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 16 );
     EXPECT_EQ( m.getElement( 3, 1 ), 17 );
     EXPECT_EQ( m.getElement( 3, 2 ),  0 );
@@ -289,7 +585,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 3, 7 ),  0 );
     EXPECT_EQ( m.getElement( 3, 8 ),  0 );
     EXPECT_EQ( m.getElement( 3, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ), 18 );
     EXPECT_EQ( m.getElement( 4, 1 ),  0 );
     EXPECT_EQ( m.getElement( 4, 2 ),  0 );
@@ -300,7 +596,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 4, 7 ),  0 );
     EXPECT_EQ( m.getElement( 4, 8 ),  0 );
     EXPECT_EQ( m.getElement( 4, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ), 19 );
     EXPECT_EQ( m.getElement( 5, 1 ),  0 );
     EXPECT_EQ( m.getElement( 5, 2 ),  0 );
@@ -311,7 +607,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 5, 7 ),  0 );
     EXPECT_EQ( m.getElement( 5, 8 ),  0 );
     EXPECT_EQ( m.getElement( 5, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 6, 0 ), 20 );
     EXPECT_EQ( m.getElement( 6, 1 ),  0 );
     EXPECT_EQ( m.getElement( 6, 2 ),  0 );
@@ -322,7 +618,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 6, 7 ),  0 );
     EXPECT_EQ( m.getElement( 6, 8 ),  0 );
     EXPECT_EQ( m.getElement( 6, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 7, 0 ), 21 );
     EXPECT_EQ( m.getElement( 7, 1 ),  0 );
     EXPECT_EQ( m.getElement( 7, 2 ),  0 );
@@ -333,7 +629,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 7, 7 ),  0 );
     EXPECT_EQ( m.getElement( 7, 8 ),  0 );
     EXPECT_EQ( m.getElement( 7, 9 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 8, 0 ), 22 );
     EXPECT_EQ( m.getElement( 8, 1 ), 23 );
     EXPECT_EQ( m.getElement( 8, 2 ), 24 );
@@ -344,7 +640,7 @@ void test_SetElement()
     EXPECT_EQ( m.getElement( 8, 7 ), 29 );
     EXPECT_EQ( m.getElement( 8, 8 ), 30 );
     EXPECT_EQ( m.getElement( 8, 9 ), 31 );
-    
+
     EXPECT_EQ( m.getElement( 9, 0 ), 32 );
     EXPECT_EQ( m.getElement( 9, 1 ), 33 );
     EXPECT_EQ( m.getElement( 9, 2 ), 34 );
@@ -363,7 +659,7 @@ void test_AddElement()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 6x5 sparse matrix:
  *
@@ -374,10 +670,10 @@ void test_AddElement()
  *    |  0 11  0  0  0 |
  *    \  0  0  0 12  0 /
  */
-    
+
     const IndexType rows = 6;
     const IndexType cols = 5;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
@@ -385,61 +681,61 @@ void test_AddElement()
     rowLengths.setSize( rows );
     rowLengths.setValue( 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
         m.setElement( 0, i, value++ );
-    
+
     for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
         m.setElement( 1, i, value++ );
-        
+
     for( IndexType i = 2; i < cols; i++ )         // 2nd row
         m.setElement( 2, i, value++ );
-        
+
     m.setElement( 3, 0, value++ );      // 3rd row
-     
+
     m.setElement( 4, 1, value++ );      // 4th row
- 
+
     m.setElement( 5, 3, value++ );      // 5th row
-    
-        
+
+
     // Check the set elements
     EXPECT_EQ( m.getElement( 0, 0 ),  1 );
     EXPECT_EQ( m.getElement( 0, 1 ),  2 );
     EXPECT_EQ( m.getElement( 0, 2 ),  3 );
     EXPECT_EQ( m.getElement( 0, 3 ),  0 );
     EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  0 );
     EXPECT_EQ( m.getElement( 1, 1 ),  4 );
     EXPECT_EQ( m.getElement( 1, 2 ),  5 );
     EXPECT_EQ( m.getElement( 1, 3 ),  6 );
     EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  0 );
     EXPECT_EQ( m.getElement( 2, 1 ),  0 );
     EXPECT_EQ( m.getElement( 2, 2 ),  7 );
     EXPECT_EQ( m.getElement( 2, 3 ),  8 );
     EXPECT_EQ( m.getElement( 2, 4 ),  9 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 10 );
     EXPECT_EQ( m.getElement( 3, 1 ),  0 );
     EXPECT_EQ( m.getElement( 3, 2 ),  0 );
     EXPECT_EQ( m.getElement( 3, 3 ),  0 );
     EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ),  0 );
     EXPECT_EQ( m.getElement( 4, 1 ), 11 );
     EXPECT_EQ( m.getElement( 4, 2 ),  0 );
     EXPECT_EQ( m.getElement( 4, 3 ),  0 );
     EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ),  0 );
     EXPECT_EQ( m.getElement( 5, 1 ),  0 );
     EXPECT_EQ( m.getElement( 5, 2 ),  0 );
     EXPECT_EQ( m.getElement( 5, 3 ), 12 );
     EXPECT_EQ( m.getElement( 5, 4 ),  0 );
-    
+
     // Add new elements to the old elements with a multiplying factor applied to the old elements.
 
 /*
@@ -452,7 +748,7 @@ void test_AddElement()
  *    |  0 11  0  0  0 |
  *    \  0  0  0 12  0 /
  */
-    
+
 /*
  * The following setup results in the following 6x5 sparse matrix:
  *
@@ -463,57 +759,57 @@ void test_AddElement()
  *    |  0 35 14 15  0 |
  *    \  0  0 16 41 18 /
  */
-    
+
     RealType newValue = 1;
     for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
         m.addElement( 0, i, newValue++, 2.0 );
-    
+
     for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
         m.addElement( 1, i, newValue++, 2.0 );
-        
+
     for( IndexType i = 2; i < cols; i++ )             // 2nd row
         m.addElement( 2, i, newValue++, 2.0 );
-        
+
     for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
         m.addElement( 3, i, newValue++, 2.0 );
-    
+
     for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
         m.addElement( 4, i, newValue++, 2.0 );
-    
+
     for( IndexType i = 2; i < cols; i++ )             // 5th row
         m.addElement( 5, i, newValue++, 2.0 );
-    
-    
+
+
     EXPECT_EQ( m.getElement( 0, 0 ),  3 );
     EXPECT_EQ( m.getElement( 0, 1 ),  6 );
     EXPECT_EQ( m.getElement( 0, 2 ),  9 );
     EXPECT_EQ( m.getElement( 0, 3 ),  0 );
     EXPECT_EQ( m.getElement( 0, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ),  0 );
     EXPECT_EQ( m.getElement( 1, 1 ), 12 );
     EXPECT_EQ( m.getElement( 1, 2 ), 15 );
     EXPECT_EQ( m.getElement( 1, 3 ), 18 );
     EXPECT_EQ( m.getElement( 1, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  0 );
     EXPECT_EQ( m.getElement( 2, 1 ),  0 );
     EXPECT_EQ( m.getElement( 2, 2 ), 21 );
     EXPECT_EQ( m.getElement( 2, 3 ), 24 );
     EXPECT_EQ( m.getElement( 2, 4 ), 27 );
-    
+
     EXPECT_EQ( m.getElement( 3, 0 ), 30 );
     EXPECT_EQ( m.getElement( 3, 1 ), 11 );
     EXPECT_EQ( m.getElement( 3, 2 ), 12 );
     EXPECT_EQ( m.getElement( 3, 3 ),  0 );
     EXPECT_EQ( m.getElement( 3, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 4, 0 ),  0 );
     EXPECT_EQ( m.getElement( 4, 1 ), 35 );
     EXPECT_EQ( m.getElement( 4, 2 ), 14 );
     EXPECT_EQ( m.getElement( 4, 3 ), 15 );
     EXPECT_EQ( m.getElement( 4, 4 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 5, 0 ),  0 );
     EXPECT_EQ( m.getElement( 5, 1 ),  0 );
     EXPECT_EQ( m.getElement( 5, 2 ), 16 );
@@ -527,7 +823,7 @@ void test_SetRow()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 3x7 sparse matrix:
  *
@@ -535,10 +831,10 @@ void test_SetRow()
  *    |  2  2  2  0  0  0  0 |
  *    \  3  3  3  0  0  0  0 /
  */
-    
+
     const IndexType rows = 3;
     const IndexType cols = 7;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( rows, cols );
@@ -547,7 +843,7 @@ void test_SetRow()
     rowLengths.setValue( 6 );
     rowLengths.setElement( 1, 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < 3; i++ )
     {
@@ -555,19 +851,19 @@ void test_SetRow()
         m.setElement( 1, i, value + 1 );
         m.setElement( 2, i, value + 2 );
     }
-    
+
     RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 };
     RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 };
     RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 };
-    
+
     RealType row = 0;
     IndexType elements = 3;
-    
+
     m.setRow( row++, colIndexes1, row1, elements );
     m.setRow( row++, colIndexes2, row2, elements );
     m.setRow( row++, colIndexes3, row3, elements );
-    
-    
+
+
     EXPECT_EQ( m.getElement( 0, 0 ), 11 );
     EXPECT_EQ( m.getElement( 0, 1 ), 11 );
     EXPECT_EQ( m.getElement( 0, 2 ), 11 );
@@ -575,7 +871,7 @@ void test_SetRow()
     EXPECT_EQ( m.getElement( 0, 4 ),  0 );
     EXPECT_EQ( m.getElement( 0, 5 ),  0 );
     EXPECT_EQ( m.getElement( 0, 6 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 1, 0 ), 22 );
     EXPECT_EQ( m.getElement( 1, 1 ), 22 );
     EXPECT_EQ( m.getElement( 1, 2 ), 22 );
@@ -583,7 +879,7 @@ void test_SetRow()
     EXPECT_EQ( m.getElement( 1, 4 ),  0 );
     EXPECT_EQ( m.getElement( 1, 5 ),  0 );
     EXPECT_EQ( m.getElement( 1, 6 ),  0 );
-    
+
     EXPECT_EQ( m.getElement( 2, 0 ),  0 );
     EXPECT_EQ( m.getElement( 2, 1 ),  0 );
     EXPECT_EQ( m.getElement( 2, 2 ),  0 );
@@ -600,7 +896,7 @@ void test_VectorProduct()
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
     using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
-    
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -609,10 +905,10 @@ void test_VectorProduct()
  *    |  0  4  0  0 |
  *    \  0  0  5  0 /
  */
-    
+
     const IndexType m_rows_1 = 4;
     const IndexType m_cols_1 = 4;
-    
+
     Matrix m_1;
     m_1.reset();
     m_1.setDimensions( m_rows_1, m_cols_1 );
@@ -623,37 +919,37 @@ void test_VectorProduct()
     rowLengths_1.setElement( 2, 1 );
     rowLengths_1.setElement( 3, 1 );
     m_1.setCompressedRowLengths( rowLengths_1 );
-    
+
     RealType value_1 = 1;
     m_1.setElement( 0, 0, value_1++ );      // 0th row
-    
+
     m_1.setElement( 1, 1, value_1++ );      // 1st row
     m_1.setElement( 1, 3, value_1++ );
-        
+
     m_1.setElement( 2, 1, value_1++ );      // 2nd row
-        
+
     m_1.setElement( 3, 2, value_1++ );      // 3rd row
-    
+
     VectorType inVector_1;
     inVector_1.setSize( m_cols_1 );
-    for( IndexType i = 0; i < inVector_1.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_1.getSize(); i++ )
         inVector_1.setElement( i, 2 );
 
-    VectorType outVector_1;  
+    VectorType outVector_1;
     outVector_1.setSize( m_rows_1 );
     for( IndexType j = 0; j < outVector_1.getSize(); j++ )
         outVector_1.setElement( j, 0 );
- 
-    
+
+
     m_1.vectorProduct( inVector_1, outVector_1 );
-    
-   
+
+
     EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
     EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
     EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
     EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
-    
-    
+
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -662,10 +958,10 @@ void test_VectorProduct()
  *    |  5  6  7  0 |
  *    \  0  8  0  0 /
  */
-    
+
     const IndexType m_rows_2 = 4;
     const IndexType m_cols_2 = 4;
-    
+
     Matrix m_2;
     m_2.reset();
     m_2.setDimensions( m_rows_2, m_cols_2 );
@@ -675,39 +971,39 @@ void test_VectorProduct()
     rowLengths_2.setElement( 1, 1 );
     rowLengths_2.setElement( 3, 1 );
     m_2.setCompressedRowLengths( rowLengths_2 );
-    
+
     RealType value_2 = 1;
     for( IndexType i = 0; i < 3; i++ )   // 0th row
         m_2.setElement( 0, i, value_2++ );
-    
+
     m_2.setElement( 1, 3, value_2++ );      // 1st row
-        
+
     for( IndexType i = 0; i < 3; i++ )   // 2nd row
         m_2.setElement( 2, i, value_2++ );
-        
+
     for( IndexType i = 1; i < 2; i++ )       // 3rd row
         m_2.setElement( 3, i, value_2++ );
-    
+
     VectorType inVector_2;
     inVector_2.setSize( m_cols_2 );
-    for( IndexType i = 0; i < inVector_2.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_2.getSize(); i++ )
         inVector_2.setElement( i, 2 );
 
-    VectorType outVector_2;  
+    VectorType outVector_2;
     outVector_2.setSize( m_rows_2 );
     for( IndexType j = 0; j < outVector_2.getSize(); j++ )
         outVector_2.setElement( j, 0 );
- 
-    
+
+
     m_2.vectorProduct( inVector_2, outVector_2 );
-    
-   
+
+
     EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
     EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
     EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
     EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
-    
-    
+
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -716,10 +1012,10 @@ void test_VectorProduct()
  *    |  7  8  9  0 |
  *    \  0 10 11 12 /
  */
-    
+
     const IndexType m_rows_3 = 4;
     const IndexType m_cols_3 = 4;
-    
+
     Matrix m_3;
     m_3.reset();
     m_3.setDimensions( m_rows_3, m_cols_3 );
@@ -727,40 +1023,40 @@ void test_VectorProduct()
     rowLengths_3.setSize( m_rows_3 );
     rowLengths_3.setValue( 3 );
     m_3.setCompressedRowLengths( rowLengths_3 );
-    
+
     RealType value_3 = 1;
     for( IndexType i = 0; i < 3; i++ )          // 0th row
         m_3.setElement( 0, i, value_3++ );
-    
+
     for( IndexType i = 1; i < 4; i++ )
         m_3.setElement( 1, i, value_3++ );      // 1st row
-        
+
     for( IndexType i = 0; i < 3; i++ )          // 2nd row
         m_3.setElement( 2, i, value_3++ );
-        
+
     for( IndexType i = 1; i < 4; i++ )          // 3rd row
         m_3.setElement( 3, i, value_3++ );
-    
+
     VectorType inVector_3;
     inVector_3.setSize( m_cols_3 );
-    for( IndexType i = 0; i < inVector_3.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_3.getSize(); i++ )
         inVector_3.setElement( i, 2 );
 
-    VectorType outVector_3;  
+    VectorType outVector_3;
     outVector_3.setSize( m_rows_3 );
     for( IndexType j = 0; j < outVector_3.getSize(); j++ )
         outVector_3.setElement( j, 0 );
- 
-    
+
+
     m_3.vectorProduct( inVector_3, outVector_3 );
-    
-   
+
+
     EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
     EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
     EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
     EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
-    
-    
+
+
 /*
  * Sets up the following 8x8 sparse matrix:
  *
@@ -773,10 +1069,10 @@ void test_VectorProduct()
  *    | 26 27 28 29 30  0  0  0 |
  *    \ 31 32 33 34 35  0  0  0 /
  */
-    
+
     const IndexType m_rows_4 = 8;
     const IndexType m_cols_4 = 8;
-    
+
     Matrix m_4;
     m_4.reset();
     m_4.setDimensions( m_rows_4, m_cols_4 );
@@ -787,48 +1083,48 @@ void test_VectorProduct()
     rowLengths_4.setElement( 6, 5 );
     rowLengths_4.setElement( 7, 5 );
     m_4.setCompressedRowLengths( rowLengths_4 );
-    
+
     RealType value_4 = 1;
     for( IndexType i = 0; i < 3; i++ )       // 0th row
         m_4.setElement( 0, i, value_4++ );
-    
+
     m_4.setElement( 0, 5, value_4++ );
-    
+
     for( IndexType i = 1; i < 5; i++ )       // 1st row
         m_4.setElement( 1, i, value_4++ );
-    
+
     for( IndexType i = 0; i < 5; i++ )       // 2nd row
         m_4.setElement( 2, i, value_4++ );
-    
+
     for( IndexType i = 1; i < 5; i++ )       // 3rd row
         m_4.setElement( 3, i, value_4++ );
-    
+
     for( IndexType i = 2; i < 6; i++ )       // 4th row
         m_4.setElement( 4, i, value_4++ );
-    
+
     for( IndexType i = 3; i < 7; i++ )       // 5th row
         m_4.setElement( 5, i, value_4++ );
-    
+
     for( IndexType i = 0; i < 5; i++ )       // 6th row
         m_4.setElement( 6, i, value_4++ );
-    
+
     for( IndexType i = 0; i < 5; i++ )       // 7th row
         m_4.setElement( 7, i, value_4++ );
-    
+
     VectorType inVector_4;
     inVector_4.setSize( m_cols_4 );
-    for( IndexType i = 0; i < inVector_4.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_4.getSize(); i++ )
         inVector_4.setElement( i, 2 );
 
-    VectorType outVector_4;  
+    VectorType outVector_4;
     outVector_4.setSize( m_rows_4 );
     for( IndexType j = 0; j < outVector_4.getSize(); j++ )
         outVector_4.setElement( j, 0 );
-    
-    
+
+
     m_4.vectorProduct( inVector_4, outVector_4 );
-    
-   
+
+
     EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
     EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
     EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
@@ -837,20 +1133,20 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
     EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
     EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
-    
-  
-/*
- * Sets up the following 8x8 sparse matrix:
- *
- *    /  1  2  3  0  4  5  0  1 \   6
- *    |  0  6  0  7  0  0  0  1 |   3
- *    |  0  8  9  0 10  0  0  1 |   4
- *    |  0 11 12 13 14  0  0  1 |   5
- *    |  0 15  0  0  0  0  0  1 |   2
- *    |  0 16 17 18 19 20 21  1 |   7
- *    | 22 23 24 25 26 27 28  1 |   8
- *    \ 29 30 31 32 33 34 35 36 /   8
- */
+
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
 
     const IndexType m_rows_5 = 8;
     const IndexType m_cols_5 = 8;
@@ -901,20 +1197,18 @@ void test_VectorProduct()
 
     for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
         m_5.setElement( i, 7, 1);
-    
+
     VectorType inVector_5;
     inVector_5.setSize( m_cols_5 );
-    for( IndexType i = 0; i < inVector_5.getSize(); i++ )        
+    for( IndexType i = 0; i < inVector_5.getSize(); i++ )
         inVector_5.setElement( i, 2 );
 
-    VectorType outVector_5;  
+    VectorType outVector_5;
     outVector_5.setSize( m_rows_5 );
     for( IndexType j = 0; j < outVector_5.getSize(); j++ )
         outVector_5.setElement( j, 0 );
-    
-    
+
     m_5.vectorProduct( inVector_5, outVector_5 );
-    
 
     EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
     EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
@@ -926,13 +1220,118 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
 }
 
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities( rows );
+   //rowLengths.setSize( rows );
+   rowsCapacities.setElement(0, 6);
+   rowsCapacities.setElement(1, 3);
+   rowsCapacities.setElement(2, 4);
+   rowsCapacities.setElement(3, 5);
+   rowsCapacities.setElement(4, 2);
+   rowsCapacities.setElement(5, 7);
+   rowsCapacities.setElement(6, 8);
+   rowsCapacities.setElement(7, 8);
+   m.setCompressedRowLengths( rowsCapacities );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 0, 4, value++ );           // 0th row
+   m.setElement( 0, 5, value++ );
+
+   m.setElement( 1, 1, value++ );           // 1st row
+   m.setElement( 1, 3, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 2, 4, value++ );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m.setElement( 3, i, value++ );
+
+   m.setElement( 4, 1, value++ );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+       m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
 template< typename Matrix >
 void test_PerformSORIteration()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -941,10 +1340,10 @@ void test_PerformSORIteration()
  *    |  0  1  4  1 |
  *    \  0  0  1  4 /
  */
-    
+
     const IndexType m_rows = 4;
     const IndexType m_cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( m_rows, m_cols );
@@ -952,54 +1351,54 @@ void test_PerformSORIteration()
     rowLengths.setSize( m_rows );
     rowLengths.setValue( 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     m.setElement( 0, 0, 4.0 );        // 0th row
     m.setElement( 0, 1, 1.0);
-        
+
     m.setElement( 1, 0, 1.0 );        // 1st row
     m.setElement( 1, 1, 4.0 );
     m.setElement( 1, 2, 1.0 );
-        
+
     m.setElement( 2, 1, 1.0 );        // 2nd row
     m.setElement( 2, 2, 4.0 );
     m.setElement( 2, 3, 1.0 );
-        
+
     m.setElement( 3, 2, 1.0 );        // 3rd row
     m.setElement( 3, 3, 4.0 );
-    
+
     RealType bVector [ 4 ] = { 1, 1, 1, 1 };
     RealType xVector [ 4 ] = { 1, 1, 1, 1 };
-    
+
     IndexType row = 0;
     RealType omega = 1;
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 1.0 );
     EXPECT_EQ( xVector[ 2 ], 1.0 );
     EXPECT_EQ( xVector[ 3 ], 1.0 );
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 0.0 );
     EXPECT_EQ( xVector[ 2 ], 1.0 );
     EXPECT_EQ( xVector[ 3 ], 1.0 );
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 0.0 );
     EXPECT_EQ( xVector[ 2 ], 0.0 );
     EXPECT_EQ( xVector[ 3 ], 1.0 );
-    
-    
+
+
     m.performSORIteration( bVector, row++, xVector, omega);
-    
+
     EXPECT_EQ( xVector[ 0 ], 0.0 );
     EXPECT_EQ( xVector[ 1 ], 0.0 );
     EXPECT_EQ( xVector[ 2 ], 0.0 );
@@ -1013,7 +1412,7 @@ void test_OperatorEquals()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   
+
    if( std::is_same< DeviceType, TNL::Devices::Cuda >::value )
        return;
    else
@@ -1059,33 +1458,33 @@ void test_OperatorEquals()
 
         m_host.setElement( 0, 4, value++ );           // 0th row
         m_host.setElement( 0, 5, value++ );
-        
+
         m_host.setElement( 1, 1, value++ );           // 1st row
         m_host.setElement( 1, 3, value++ );
 
         for( IndexType i = 1; i < 3; i++ )            // 2nd row
             m_host.setElement( 2, i, value++ );
-        
+
         m_host.setElement( 2, 4, value++ );           // 2nd row
 
-        
+
         for( IndexType i = 1; i < 5; i++ )            // 3rd row
             m_host.setElement( 3, i, value++ );
 
         m_host.setElement( 4, 1, value++ );           // 4th row
-        
+
         for( IndexType i = 1; i < 7; i++ )            // 5th row
             m_host.setElement( 5, i, value++ );
-        
+
         for( IndexType i = 0; i < 7; i++ )            // 6th row
             m_host.setElement( 6, i, value++ );
-        
+
         for( IndexType i = 0; i < 8; i++ )            // 7th row
             m_host.setElement( 7, i, value++ );
-        
+
         for( IndexType i = 0; i < 7; i++ )            // 1s at the end or rows: 5, 6
             m_host.setElement( i, 7, 1);
-        
+
         EXPECT_EQ( m_host.getElement( 0, 0 ),  1 );
         EXPECT_EQ( m_host.getElement( 0, 1 ),  2 );
         EXPECT_EQ( m_host.getElement( 0, 2 ),  3 );
@@ -1094,7 +1493,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
         EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
         EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
@@ -1103,7 +1502,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
         EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
@@ -1112,7 +1511,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
         EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
@@ -1121,7 +1520,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
         EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
@@ -1130,7 +1529,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
         EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
@@ -1139,7 +1538,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
         EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
         EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
         EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
         EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
@@ -1148,7 +1547,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
         EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
         EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
         EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
         EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
@@ -1178,7 +1577,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 0, 5 ),  5 );
         EXPECT_EQ( m_host.getElement( 0, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 0, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 1, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 1 ),  6 );
         EXPECT_EQ( m_host.getElement( 1, 2 ),  0 );
@@ -1187,7 +1586,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 1, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 1, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 2, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 1 ),  8 );
         EXPECT_EQ( m_host.getElement( 2, 2 ),  9 );
@@ -1196,7 +1595,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 2, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 2, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 3, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 1 ), 11 );
         EXPECT_EQ( m_host.getElement( 3, 2 ), 12 );
@@ -1205,7 +1604,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 3, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 3, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 4, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 1 ), 15 );
         EXPECT_EQ( m_host.getElement( 4, 2 ),  0 );
@@ -1214,7 +1613,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 4, 5 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 6 ),  0 );
         EXPECT_EQ( m_host.getElement( 4, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 5, 0 ),  0 );
         EXPECT_EQ( m_host.getElement( 5, 1 ), 16 );
         EXPECT_EQ( m_host.getElement( 5, 2 ), 17 );
@@ -1223,7 +1622,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 5, 5 ), 20 );
         EXPECT_EQ( m_host.getElement( 5, 6 ), 21 );
         EXPECT_EQ( m_host.getElement( 5, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 6, 0 ), 22 );
         EXPECT_EQ( m_host.getElement( 6, 1 ), 23 );
         EXPECT_EQ( m_host.getElement( 6, 2 ), 24 );
@@ -1232,7 +1631,7 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 6, 5 ), 27 );
         EXPECT_EQ( m_host.getElement( 6, 6 ), 28 );
         EXPECT_EQ( m_host.getElement( 6, 7 ),  1 );
-        
+
         EXPECT_EQ( m_host.getElement( 7, 0 ), 29 );
         EXPECT_EQ( m_host.getElement( 7, 1 ), 30 );
         EXPECT_EQ( m_host.getElement( 7, 2 ), 31 );
@@ -1241,22 +1640,22 @@ void test_OperatorEquals()
         EXPECT_EQ( m_host.getElement( 7, 5 ), 34 );
         EXPECT_EQ( m_host.getElement( 7, 6 ), 35 );
         EXPECT_EQ( m_host.getElement( 7, 7 ), 36 );
-        
+
         // Try vectorProduct with copied cuda matrix to see if it works correctly.
         using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >;
-    
+
         VectorType inVector;
         inVector.setSize( m_cols );
-        for( IndexType i = 0; i < inVector.getSize(); i++ )        
+        for( IndexType i = 0; i < inVector.getSize(); i++ )
             inVector.setElement( i, 2 );
 
-        VectorType outVector;  
+        VectorType outVector;
         outVector.setSize( m_rows );
         for( IndexType j = 0; j < outVector.getSize(); j++ )
             outVector.setElement( j, 0 );
-        
+
         m_cuda.vectorProduct( inVector, outVector );
-        
+
         EXPECT_EQ( outVector.getElement( 0 ),  32 );
         EXPECT_EQ( outVector.getElement( 1 ),  28 );
         EXPECT_EQ( outVector.getElement( 2 ),  56 );
@@ -1274,7 +1673,7 @@ void test_SaveAndLoad( const char* filename )
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-    
+
    /*
     * Sets up the following 4x4 sparse matrix:
     *
@@ -1283,10 +1682,10 @@ void test_SaveAndLoad( const char* filename )
     *    |  6  7  8  0 |
     *    \  0  9 10 11 /
     */
-    
+
     const IndexType m_rows = 4;
     const IndexType m_cols = 4;
-    
+
     Matrix savedMatrix;
     savedMatrix.reset();
     savedMatrix.setDimensions( m_rows, m_cols );
@@ -1294,22 +1693,22 @@ void test_SaveAndLoad( const char* filename )
     rowLengths.setSize( m_rows );
     rowLengths.setValue( 3 );
     savedMatrix.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
         savedMatrix.setElement( 0, i, value++ );
-        
+
     savedMatrix.setElement( 1, 1, value++ );
     savedMatrix.setElement( 1, 3, value++ );      // 1st row
-        
+
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
         savedMatrix.setElement( 2, i, value++ );
-        
+
     for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
         savedMatrix.setElement( 3, i, value++ );
-        
+
     ASSERT_NO_THROW( savedMatrix.save( filename ) );
-    
+
     Matrix loadedMatrix;
     loadedMatrix.reset();
     loadedMatrix.setDimensions( m_rows, m_cols );
@@ -1317,51 +1716,51 @@ void test_SaveAndLoad( const char* filename )
     rowLengths2.setSize( m_rows );
     rowLengths2.setValue( 3 );
     loadedMatrix.setCompressedRowLengths( rowLengths2 );
-    
-    
+
+
     ASSERT_NO_THROW( loadedMatrix.load( filename ) );
-    
-    
+
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
     EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
     EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
     EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
     EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
     EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
     EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
     EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
     EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
     EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
-    
+
     EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
     EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
     EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
     EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
-    
+
     EXPECT_EQ( std::remove( filename ), 0 );
 }
 
@@ -1371,7 +1770,7 @@ void test_Print()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    
+
 /*
  * Sets up the following 5x4 sparse matrix:
  *
@@ -1381,10 +1780,10 @@ void test_Print()
  *    |  0  8  9 10 |
  *    \  0  0 11 12 /
  */
-    
+
     const IndexType m_rows = 5;
     const IndexType m_cols = 4;
-    
+
     Matrix m;
     m.reset();
     m.setDimensions( m_rows, m_cols );
@@ -1392,40 +1791,40 @@ void test_Print()
     rowLengths.setSize( m_rows );
     rowLengths.setValue( 3 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     RealType value = 1;
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
         m.setElement( 0, i, value++ );
-    
+
     m.setElement( 1, 3, value++ );      // 1st row
-        
+
     for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
         m.setElement( 2, i, value++ );
-        
+
     for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
         m.setElement( 3, i, value++ );
-        
+
     for( IndexType i = 2; i < m_cols; i++ )       // 4th row
         m.setElement( 4, i, value++ );
-    
+
     #include <sstream>
     std::stringstream printed;
     std::stringstream couted;
-    
+
     //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf()); 
+    auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
     m.print( std::cout ); //all the std::cout goes to ss
 
     std::cout.rdbuf(old_buf); //reset
-    
+
     couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
                "Row: 1 ->  Col:3->4\t\n"
                "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
                "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
                "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
-    
-    
+
+
     EXPECT_EQ( printed.str(), couted.str() );
 }
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
similarity index 85%
rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
index 7effb52cd864fc61c6cc27345694c00c487c0328..8e07205e5e9c012a05e75923c8065c4aabf1717e 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/AdEllpack.h>
+#include <TNL/Matrices/Legacy/AdEllpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
@@ -27,10 +27,6 @@ protected:
 // types for which MatrixTest is instantiated
 using AdEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::AdEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::AdEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::AdEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::AdEllpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::AdEllpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::AdEllpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::AdEllpack< float,  TNL::Devices::Host, int >,
@@ -40,11 +36,7 @@ using AdEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::AdEllpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::AdEllpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::AdEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::AdEllpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::AdEllpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::AdEllpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::AdEllpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, int >,
@@ -133,4 +125,4 @@ TYPED_TEST( AdEllpackMatrixTest, printTest )
 #endif
 
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
similarity index 87%
rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
index 33e530be57e6675bac01f735547a79b4731b57a9..c386481071f7ce2e4a209d3457f47a060e8dae98 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/BiEllpack.h>
+#include <TNL/Matrices/Legacy/BiEllpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
@@ -27,10 +27,6 @@ protected:
 // types for which MatrixTest is instantiated
 using BiEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::BiEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::BiEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::BiEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::BiEllpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::BiEllpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::BiEllpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::BiEllpack< float,  TNL::Devices::Host, int >,
@@ -40,11 +36,7 @@ using BiEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::BiEllpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::BiEllpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::BiEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::BiEllpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::BiEllpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::BiEllpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::BiEllpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, int >,
@@ -142,4 +134,4 @@ TYPED_TEST( BiEllpackMatrixTest, printTest )
 }
 #endif // HAVE_GTEST
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..258ad2c53831010111eeec9dc240368ae5dffb35
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu
new file mode 100644
index 0000000000000000000000000000000000000000..258ad2c53831010111eeec9dc240368ae5dffb35
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
new file mode 100644
index 0000000000000000000000000000000000000000..13c1ed6e00c872623c66fc88d8e57cb94ffb7557
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h
@@ -0,0 +1,131 @@
+/***************************************************************************
+                          SparseMatrixTest_CSR.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/CSR.h>
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class CSRMatrixTest : public ::testing::Test
+{
+protected:
+   using CSRMatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using CSRMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::CSR< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::CSR< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::CSR< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::CSR< double, TNL::Devices::Host, int >,
+    TNL::Matrices::CSR< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::CSR< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::CSR< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::CSR< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::CSR< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::CSR< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::CSR< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::CSR< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::CSR< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::CSR< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::CSR< double, TNL::Devices::Cuda, long >
+#endif
+>;
+
+TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
+
+TYPED_TEST( CSRMatrixTest, setDimensionsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetDimensions< CSRMatrixType >();
+}
+
+//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
+//{
+////    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+//
+////    test_SetCompressedRowLengths< CSRMatrixType >();
+//
+//    bool testRan = false;
+//    EXPECT_TRUE( testRan );
+//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+//    std::cout << "      This test is dependent on the input format. \n";
+//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
+//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
+//}
+
+TYPED_TEST( CSRMatrixTest, setLikeTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetLike< CSRMatrixType, CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, resetTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Reset< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, setElementTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetElement< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, addElementTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_AddElement< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, setRowTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetRow< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, vectorProductTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_VectorProduct< CSRMatrixType >();
+}
+
+TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" );
+}
+
+TYPED_TEST( CSRMatrixTest, printTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Print< CSRMatrixType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu
similarity index 100%
rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
similarity index 87%
rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
index 6909b53a5304df75aa021484402f1c3986ec9b5f..5d304bde3fff44ffdaf3ccbd812412faa2738824 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/ChunkedEllpack.h>
+#include <TNL/Matrices/Legacy/ChunkedEllpack.h>
 
 #include "SparseMatrixTest.hpp"
 #include <iostream>
@@ -28,10 +28,6 @@ protected:
 // types for which MatrixTest is instantiated
 using ChEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, short >,
     TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Host, int >,
     TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Host, int >,
     TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Host, int >,
@@ -41,11 +37,7 @@ using ChEllpackMatrixTypes = ::testing::Types
     TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Host, long >,
     TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Cuda, int >,
+   ,TNL::Matrices::ChunkedEllpack< int,    TNL::Devices::Cuda, int >,
     TNL::Matrices::ChunkedEllpack< long,   TNL::Devices::Cuda, int >,
     TNL::Matrices::ChunkedEllpack< float,  TNL::Devices::Cuda, int >,
     TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, int >,
@@ -144,4 +136,4 @@ TYPED_TEST( ChunkedEllpackMatrixTest, printTest )
 
 #endif
 
-#include "../main.h"
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c454706f0b1d437b798f2d7a1e93ccf4c0291d3f
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c454706f0b1d437b798f2d7a1e93ccf4c0291d3f
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..bb9fe4fc76e49a408a6a370ab5b1b9b9c1e56660
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h
@@ -0,0 +1,131 @@
+/***************************************************************************
+                          SparseMatrixTest_Ellpack.h -  description
+                             -------------------
+    begin                : Nov 2, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/Ellpack.h>
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class EllpackMatrixTest : public ::testing::Test
+{
+protected:
+   using EllpackMatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using EllpackMatrixTypes = ::testing::Types
+<
+    TNL::Matrices::Ellpack< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::Ellpack< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::Ellpack< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::Ellpack< double, TNL::Devices::Host, int >,
+    TNL::Matrices::Ellpack< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::Ellpack< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::Ellpack< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::Ellpack< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, long >
+#endif
+>;
+
+TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes );
+
+TYPED_TEST( EllpackMatrixTest, setDimensionsTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetDimensions< EllpackMatrixType >();
+}
+
+//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest )
+//{
+////    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+//
+////    test_SetCompressedRowLengths< EllpackMatrixType >();
+//
+//    bool testRan = false;
+//    EXPECT_TRUE( testRan );
+//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+//    std::cout << "      This test is dependent on the input format. \n";
+//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
+//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
+//}
+
+TYPED_TEST( EllpackMatrixTest, setLikeTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetLike< EllpackMatrixType, EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, resetTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Reset< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, setElementTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetElement< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, addElementTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_AddElement< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, setRowTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SetRow< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, vectorProductTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_VectorProduct< EllpackMatrixType >();
+}
+
+TYPED_TEST( EllpackMatrixTest, saveAndLoadTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack" );
+}
+
+TYPED_TEST( EllpackMatrixTest, printTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Print< EllpackMatrixType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..40e2e94b81ca64051ddceee82f46dd2d20e66e42
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu
new file mode 100644
index 0000000000000000000000000000000000000000..40e2e94b81ca64051ddceee82f46dd2d20e66e42
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu
@@ -0,0 +1 @@
+#include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
new file mode 100644
index 0000000000000000000000000000000000000000..8b39583847d765ad2ede230a00eea74caec119f7
--- /dev/null
+++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h
@@ -0,0 +1,136 @@
+/***************************************************************************
+                          SparseMatrixTest_SlicedEllpack.h -  description
+                             -------------------
+    begin                : Dec 9, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+
+#include "SparseMatrixTest.hpp"
+#include <iostream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+// test fixture for typed tests
+template< typename Matrix >
+class SlicedEllpackMatrixTest : public ::testing::Test
+{
+protected:
+   using SlicedEllpackMatrixType = Matrix;
+};
+
+template< typename Real, typename Device, typename Index >
+using SlicedEllpackType = TNL::Matrices::SlicedEllpack< Real, Device, Index, 32 >;
+
+
+// types for which MatrixTest is instantiated
+using SlicedEllpackMatrixTypes = ::testing::Types
+<
+    SlicedEllpackType< int,     TNL::Devices::Host, int   >,
+    SlicedEllpackType< long,    TNL::Devices::Host, int   >,
+    SlicedEllpackType< float,   TNL::Devices::Host, int   >,
+    SlicedEllpackType< double,  TNL::Devices::Host, int   >,
+    SlicedEllpackType< int,     TNL::Devices::Host, long  >,
+    SlicedEllpackType< long,    TNL::Devices::Host, long  >,
+    SlicedEllpackType< float,   TNL::Devices::Host, long  >,
+    SlicedEllpackType< double,  TNL::Devices::Host, long  >
+#ifdef HAVE_CUDA
+   ,SlicedEllpackType< int,     TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< long,    TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< float,   TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< double,  TNL::Devices::Cuda, int   >,
+    SlicedEllpackType< int,     TNL::Devices::Cuda, long  >,
+    SlicedEllpackType< long,    TNL::Devices::Cuda, long  >,
+    SlicedEllpackType< float,   TNL::Devices::Cuda, long  >,
+    SlicedEllpackType< double,  TNL::Devices::Cuda, long  >
+#endif
+>;
+
+TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes);
+
+TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetDimensions< SlicedEllpackMatrixType >();
+}
+
+//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
+//{
+////    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+//
+////    test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
+//
+//    bool testRan = false;
+//    EXPECT_TRUE( testRan );
+//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+//    std::cout << "      This test is dependent on the input format. \n";
+//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
+//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
+//}
+
+TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, resetTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Reset< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, setElementTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetElement< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, addElementTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_AddElement< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, setRowTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SetRow< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_VectorProduct< SlicedEllpackMatrixType >();
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" );
+}
+
+TYPED_TEST( SlicedEllpackMatrixTest, printTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Print< SlicedEllpackMatrixType >();
+}
+
+#endif
+
+#include "../../main.h"
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..639f1964086784bfdb174443a5f2554b703e511b
--- /dev/null
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          MultidiagonalMatrixTest.cpp -  description
+                             -------------------
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "MultidiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..53541edbd003d084e1b50b742472beec086d87fb
--- /dev/null
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          MultidiagonalMatrixTest.cu -  description
+                             -------------------
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "MultidiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..49bcfa11c44d6adb5be20013a2bd2064febd208e
--- /dev/null
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -0,0 +1,1596 @@
+/***************************************************************************
+                          MultidiagonalMatrixTest.h -  description
+                             -------------------
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <sstream>
+#include <TNL/Devices/Host.h>
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Array.h>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <iostream>
+
+using Multidiagonal_host_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >;
+using Multidiagonal_host_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >;
+
+using Multidiagonal_cuda_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >;
+using Multidiagonal_cuda_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >;
+
+static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl";
+
+#ifdef HAVE_GTEST
+#include <type_traits>
+
+#include <gtest/gtest.h>
+
+void test_GetSerializationType()
+{
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+   const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 };
+
+   Matrix m;
+   m.setDimensions( rows, cols, diagonalsShifts );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
+}
+
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+   using DiagonalsShiftsType = typename Matrix1::DiagonalsShiftsType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+   const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 };
+
+   Matrix1 m1;
+   m1.setDimensions( rows + 1, cols + 2, diagonalsShifts );
+
+   Matrix2 m2;
+   m2.setDimensions( rows, cols, diagonalsShifts );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNonemptyRowsCount()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x8 matrix:
+    *
+    *    /  1  0  0  1  0  1  0  0 \
+    *    |  0  1  0  0  1  0  1  0 |
+    *    |  1  0  1  0  0  1  0  1 |
+    *    |  0  1  0  1  0  0  1  0 |
+    *    \  0  0  1  0  1  0  0  1 /
+    */
+   Matrix m1( 5, 8, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m1.setValue( 1.0 );
+   EXPECT_EQ( m1.getNonemptyRowsCount(), 5 );
+
+   /*
+    * Sets up the following 5x5 matrix:
+    *
+    *    /  1  0  0  1  0  \
+    *    |  0  1  0  0  1  |
+    *    |  1  0  1  0  0  |
+    *    |  0  1  0  1  0  |
+    *    \  0  0  1  0  1  /
+    */
+   Matrix m2( 5, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m2.setValue( 1.0 );
+   EXPECT_EQ( m2.getNonemptyRowsCount(), 5 );
+
+   /*
+    * Sets up the following 8x5 matrix:
+    *
+    *    /  1  0  0  1  0  \
+    *    |  0  1  0  0  1  |
+    *    |  1  0  1  0  0  |
+    *    |  0  1  0  1  0  |
+    *    |  0  0  1  0  1  |
+    *    |  0  0  0  1  0  |
+    *    |  0  0  0  0  1  |
+    *    \  0  0  0  0  0  /
+    */
+   Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m3.setValue( 1.0 );
+   EXPECT_EQ( m3.getNonemptyRowsCount(), 7 );
+}
+
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 8x8 matrix:
+    *
+    *    /  0  0  0  1  0  1  0  0 \  -> 2
+    *    |  0  1  0  0  1  0  1  0 |  -> 3
+    *    |  1  0  1  0  0  1  0  1 |  -> 4
+    *    |  0  1  0  1  0  0  1  0 |  -> 3
+    *    |  0  0  1  0  1  0  0  1 |  -> 3
+    *    |  0  0  0  1  0  1  0  0 |  -> 2
+    *    |  0  0  0  0  1  0  1  0 |  -> 2
+    *    \  0  0  0  0  0  1  0  0 /  -> 1
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m( rows, cols, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   m.setValue( 1.0 );
+   m.setElement( 0, 0, 0.0 );
+   m.setElement( 7, 7, 0.0 );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows );
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 4, 3, 3, 2, 2, 1 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix >
+void test_GetAllocatedElementsCount()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   const IndexType rows = 7;
+   const IndexType cols = 6;
+
+   Matrix m1( 7, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   EXPECT_EQ( m1.getAllocatedElementsCount(), 28 );
+
+   Matrix m2( 8, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   EXPECT_EQ( m2.getAllocatedElementsCount(), 32 );
+
+   Matrix m3( 9, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   EXPECT_EQ( m3.getAllocatedElementsCount(), 32 );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 7x6 matrix:
+    *
+    *    /  0  0  1  0  1  0 \ -> 2
+    *    |  0  1  0  1  0  1 | -> 3
+    *    |  0  0  1  0  1  0 | -> 2
+    *    |  1  0  0  1  0  1 | -> 3
+    *    |  0  1  0  0  1  0 | -> 2
+    *    |  0  0  1  0  0  1 | -> 2
+    *    \  0  0  0  1  0  0 / -> 1
+    *                           ----
+    *                            15
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
+
+   Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) );
+   m.setValue( 1.0 );
+   m.setElement( 0, 0, 0.0 );
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols, DiagonalsShiftsType( { 0, 1, 2, 4 } ) );
+   m.reset();
+
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_SetValue()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 7x6 matrix:
+    *
+    *    /  1  0  1  0  1  0 \
+    *    |  0  1  0  1  0  1 |
+    *    |  0  0  1  0  1  0 |
+    *    |  1  0  0  1  0  1 |
+    *    |  0  1  0  0  1  0 |
+    *    |  0  0  1  0  0  1 |
+    *    \  0  0  0  1  0  0 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
+
+   Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) );
+   m.setValue( 1.0 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 0, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 1, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 1, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 1 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 1 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 1 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 1 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 0 );
+}
+
+template< typename Matrix >
+void test_SetElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x5 matrix:
+    *
+    *    /  1  2  0  0  5 \
+    *    |  0  7  8  0  0 |
+    *    |  0  0 13 14  0 |
+    *    | 16  0  0 19 20 |
+    *    \  0 22  0  0 25 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 5;
+   DiagonalsShiftsType diagonals{-3, 0, 1, 4 };
+   Matrix m( rows, cols, diagonals );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         if( diagonals.containsValue( j - i ) )
+            m.setElement( i, j, value++ );
+         else
+         {
+            EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
+         }
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x5 matrix:
+    *
+    *    /  1  2  0  0  5 \
+    *    |  0  7  8  0  0 |
+    *    |  0  0 13 14  0 |
+    *    |  0  0  0 19 20 |
+    *    \  0  0  0  0 25 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 5;
+   DiagonalsShiftsType diagonals{-3, 0, 1, 4 };
+   Matrix m( rows, cols, diagonals );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         if( diagonals.containsValue( j - i ) )
+         {
+            if( j >= i )
+               m.setElement( i, j, value );
+            value++;
+         }
+         else
+         {
+            EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
+         }
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  5 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 matrix:
+    *
+    *     /  1  2  0  0  5 \   /  1  2  0  0  5 \    /  3  6  0  0 15 \
+    *     |  0  7  8  0  0 |   |  0  7  8  0  0 |    |  0 21 24  0  0 |
+    * 2 * |  0  0 13 14  0 | + |  0  0 13 14  0 | =  |  0  0 39 42  0 |
+    *     |  0  0  0 19 20 |   | 16  0  0 19 20 |    | 16  0  0 57 60 |
+    *     \  0  0  0  0 25 /   \  0 22  0  0 25 /    \  0 22  0  0 75 /
+    *
+    */
+
+   value = 1;
+   RealType multiplicator = 2;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         if( diagonals.containsValue( j - i ) )
+            m.addElement( i, j, value++, multiplicator );
+         else
+         {
+            EXPECT_THROW( m.addElement( i, j, value++, multiplicator ), std::logic_error );
+         }
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 21 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 39 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 57 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 60 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 22 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 75 );
+}
+
+template< typename Matrix >
+void test_SetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x7 matrix:
+    *
+    *    /  1  0  2  0  3  0  0 \
+    *    |  4  5  0  6  0  7  0 |
+    *    |  0  8  9  0 10  0 11 |
+    *    |  0  0 12 13  0 14  0 |
+    *    \  0  0  0 15 16  0 17 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 7;
+
+   Matrix m( rows, cols, DiagonalsShiftsType({ -1, 0, 2, 4 }) );
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 5 ][ 4 ] {
+         {  0,  1,  2,  3 },
+         {  4,  5,  6,  7 },
+         {  8,  9, 10, 11 },
+         { 12, 13, 14,  0 },
+         { 15, 16, 17,  0 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 4; i++ )
+         row.setElement( i, values[ rowIdx ][ i ] );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType) 0, rows, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 11 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ), 14 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 15 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 17 );
+}
+
+template< typename Matrix >
+void test_AddRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 6x5 matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  7  8  9  0 |
+    *    |  0  0 13 14 15 |
+    *    |  0  0  0 19 20 |
+    *    |  0  0  0  0 25 |
+    *    \  0  0  0  0  0 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+   DiagonalsShiftsType diagonals( { -2, 0, 1, 2 } );
+
+   Matrix m( rows, cols, diagonals );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         IndexType offset = j - i;
+         if( diagonals.containsValue( offset ) && offset >= 0)
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 15 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *  / 0  0  0  0  0  0 \   /  1  2  3  0  0 \   / 11  0  0  0  0 \   / 11   0  0   0   0 \
+    *  | 0  1  0  0  0  0 |   |  0  7  8  9  0 |   |  0 22  0  0  0 |   |  0  29  8   9   0 |
+    *  | 0  0  2  0  0  0 | * |  0  0 13 14 15 | + | 33  0 33  0  0 | = | 33   0 59  28  30 |
+    *  | 0  0  0  3  0  0 |   |  0  0  0 19 20 |   |  0 44  0 44  0 |   |  0  44  0 101  60 |
+    *  | 0  0  0  0  4  0 |   |  0  0  0  0 25 |   |  0  0 55  0 55 |   |  0   0 55   0 155 |
+    *  \ 0  0  0  0  0  5 /   \  0  0  0  0  0 /   \  0  0  0 66  0 /   \  0   0  0  66   0 /
+    */
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 6 ][ 4 ] {
+         {  0, 11, 0,  0 },
+         {  0, 22, 0,  0 },
+         { 33, 33, 0,  0 },
+         { 44, 44, 0,  0 },
+         { 55, 55, 0,  0 },
+         { 66,  0, 0,  0 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 4; i++ )
+      {
+         RealType& val = row.getValue( i );
+         val = rowIdx * val + values[ rowIdx ][ i ];
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+   EXPECT_EQ( m.getElement( 1, 2 ),   8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),   9 );
+   EXPECT_EQ( m.getElement( 1, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  33 );
+   EXPECT_EQ( m.getElement( 2, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  28 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  30  );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  44 );
+   EXPECT_EQ( m.getElement( 3, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  60 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  55 );
+   EXPECT_EQ( m.getElement( 4, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 155 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  66 );
+   EXPECT_EQ( m.getElement( 5, 4 ),   0 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  0  3  0 \
+    *    |  0  6  0  8 |
+    *    |  9  0 11  0 |
+    *    |  0 14  0 16 |
+    *    \  0  0 19  0 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+   DiagonalsShiftsType diagonals{ -2, 0, 2 };
+
+   Matrix m( rows, cols, diagonals );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( diagonals.containsValue( j - i ) )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+   VectorType inVector( 4 );
+   inVector = 2;
+
+   VectorType outVector( 5 );
+   outVector = 0;
+
+   m.vectorProduct( inVector, outVector);
+
+   EXPECT_EQ( outVector.getElement( 0 ),  8 );
+   EXPECT_EQ( outVector.getElement( 1 ), 28 );
+   EXPECT_EQ( outVector.getElement( 2 ), 40 );
+   EXPECT_EQ( outVector.getElement( 3 ), 60 );
+   EXPECT_EQ( outVector.getElement( 4 ), 38 );
+}
+
+template< typename Matrix1, typename Matrix2 = Matrix1 >
+void test_AddMatrix()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+   using DiagonalsShiftsType1 = typename Matrix1::DiagonalsShiftsType;
+   using DiagonalsShiftsType2 = typename Matrix2::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+   DiagonalsShiftsType1 diagonals1;
+   DiagonalsShiftsType2 diagonals2;
+
+   Matrix1 m( rows, cols, diagonals1 );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  3  4  5  0 |
+    *    |  0  6  7  8 |
+    *    |  0  0  9 10 |
+    *    \  0  0  0 11 /
+    */
+   Matrix2 m2( rows, cols, diagonals2 );
+
+   RealType newValue = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+         if( abs( i - j ) <= 1 )
+            m2.setElement( i, j, newValue++ );
+
+   /*
+    * Compute the following 5x4 matrix:
+    *
+    *  /  1  2  0  0 \       /  1  2  0  0 \    /  3  6  0  0 \
+    *  |  5  6  7  0 |       |  3  4  5  0 |    | 11 14 17  0 |
+    *  |  0 10 11 12 | + 2 * |  0  6  7  8 | =  |  0 22 25 28 |
+    *  |  0  0 15 16 |       |  0  0  9 10 |    |  0  0 33 36 |
+    *  \  0  0  0 20 /       \  0  0  0 11 /    \  0  0  0 42 /
+    */
+
+   Matrix1 mResult;
+   mResult.reset();
+   mResult.setDimensions( rows, cols );
+
+   mResult = m;
+
+   RealType matrixMultiplicator = 2;
+   RealType thisMatrixMultiplicator = 1;
+
+   mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+   EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+   EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+   EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+   EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+   EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+   EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+   EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( mResult.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), 11 );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), 14 );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), 17 );
+   EXPECT_EQ( mResult.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), 22 );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), 25 );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), 28 );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), 33 );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), 36 );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), 42 );
+}
+
+template< typename Matrix >
+void test_GetMatrixProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+/*
+ * Sets up the following 5x4 matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType leftRows = 5;
+    const IndexType leftCols = 4;
+    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
+
+    Matrix leftMatrix( leftRows, leftCols, diagonalsShifts );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < leftRows; i++ )
+        for( IndexType j = 0; j < leftCols; j++)
+            leftMatrix.setElement( i, j, value++ );
+
+/*
+ * Sets up the following 4x5 matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    \ 16 17 18 19 20 /
+ */
+    const IndexType rightRows = 4;
+    const IndexType rightCols = 5;
+
+    Matrix rightMatrix;
+    rightMatrix.reset();
+    rightMatrix.setDimensions( rightRows, rightCols );
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < rightRows; i++ )
+        for( IndexType j = 0; j < rightCols; j++)
+            rightMatrix.setElement( i, j, newValue++ );
+
+/*
+ * Sets up the following 5x5 resulting matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+
+    Matrix mResult( leftRows, rightCols, diagonalsShifts );
+    mResult.setValue( 0 );
+
+    RealType leftMatrixMultiplicator = 1;
+    RealType rightMatrixMultiplicator = 2;
+/*
+ *      /  1  2  3  4 \                            /  220  240  260  280  300 \
+ *      |  5  6  7  8 |       /  1  2  3  4  5 \   |  492  544  596  648  700 |
+ *  1 * |  9 10 11 12 | * 2 * |  6  7  8  9 10 | = |  764  848  932 1016 1100 |
+ *      | 13 14 15 16 |       | 11 12 13 14 15 |   | 1036 1152 1268 1384 1500 |
+ *      \ 17 18 19 20 /       \ 16 17 18 19 20 /   \ 1308 1456 1604 1752 1900 /
+ */
+
+    mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ),  220 );
+    EXPECT_EQ( mResult.getElement( 0, 1 ),  240 );
+    EXPECT_EQ( mResult.getElement( 0, 2 ),  260 );
+    EXPECT_EQ( mResult.getElement( 0, 3 ),  280 );
+    EXPECT_EQ( mResult.getElement( 0, 4 ),  300 );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ),  492 );
+    EXPECT_EQ( mResult.getElement( 1, 1 ),  544 );
+    EXPECT_EQ( mResult.getElement( 1, 2 ),  596 );
+    EXPECT_EQ( mResult.getElement( 1, 3 ),  648 );
+    EXPECT_EQ( mResult.getElement( 1, 4 ),  700 );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ),  764 );
+    EXPECT_EQ( mResult.getElement( 2, 1 ),  848 );
+    EXPECT_EQ( mResult.getElement( 2, 2 ),  932 );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 );
+    EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 );
+    EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 );
+    EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 );
+}
+
+template< typename Matrix >
+void test_GetTransposition()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+/*
+ * Sets up the following 3x2 matrix:
+ *
+ *    /  1  2 \
+ *    |  3  4 |
+ *    \  5  6 /
+ */
+    const IndexType rows = 3;
+    const IndexType cols = 2;
+    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
+
+    Matrix m( rows, cols, diagonalsShifts );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    m.print( std::cout );
+
+/*
+ * Sets up the following 2x3 matrix:
+ *
+ *    /  0  0  0 \
+ *    \  0  0  0 /
+ */
+    Matrix mTransposed( cols, rows, diagonalsShifts );
+
+    mTransposed.print( std::cout );
+
+    RealType matrixMultiplicator = 1;
+
+    mTransposed.getTransposition( m, matrixMultiplicator );
+
+    mTransposed.print( std::cout );
+
+/*
+ * Should result in the following 2x3 matrix:
+ *
+ *    /  1  3  5 \
+ *    \  2  4  6 /
+ */
+
+    EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 );
+    EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 );
+    EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 );
+
+    EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 );
+    EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 );
+    EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 );
+}
+
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+/*
+ * Sets up the following 4x4 matrix:
+ *
+ *    /  4  1  1  1 \
+ *    |  1  4  1  1 |
+ *    |  1  1  4  1 |
+ *    \  1  1  1  4 /
+ */
+    const IndexType rows = 4;
+    const IndexType cols = 4;
+    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
+
+    Matrix m( rows, cols, diagonalsShifts );
+
+    m.setElement( 0, 0, 4.0 );        // 0th row
+    m.setElement( 0, 1, 1.0 );
+    m.setElement( 0, 2, 1.0 );
+    m.setElement( 0, 3, 1.0 );
+
+    m.setElement( 1, 0, 1.0 );        // 1st row
+    m.setElement( 1, 1, 4.0 );
+    m.setElement( 1, 2, 1.0 );
+    m.setElement( 1, 3, 1.0 );
+
+    m.setElement( 2, 0, 1.0 );
+    m.setElement( 2, 1, 1.0 );        // 2nd row
+    m.setElement( 2, 2, 4.0 );
+    m.setElement( 2, 3, 1.0 );
+
+    m.setElement( 3, 0, 1.0 );        // 3rd row
+    m.setElement( 3, 1, 1.0 );
+    m.setElement( 3, 2, 1.0 );
+    m.setElement( 3, 3, 4.0 );
+
+    RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+    RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+
+    IndexType row = 0;
+    RealType omega = 1;
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ],  1.0 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  0.15625 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ], 0.15625 );
+    EXPECT_EQ( xVector[ 3 ], 0.3671875 );
+}
+
+template< typename Matrix >
+void test_AssignmentOperator()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   constexpr bool rowMajorOrder = Matrix::getRowMajorOrder();
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } );
+   MultidiagonalHost hostMatrix( rows, columns, diagonalsShifts );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j <  columns; j++ )
+         if( diagonalsShifts.containsValue( j - i ) )
+            hostMatrix.setElement( i, j,  i + j );
+
+   Matrix matrix( rows, columns, diagonalsShifts );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+            if( diagonalsShifts.containsValue( j - i ) )
+               EXPECT_EQ( matrix.getElement( i, j ), i + j );
+            else
+               EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonalsShifts );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonalsShifts.containsValue( j - i ) )
+            cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonalsShifts.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+
+template< typename Matrix >
+void test_SaveAndLoad()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 4x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    \  0  0 15 16 /
+    */
+   const IndexType rows = 4;
+   const IndexType cols = 4;
+   DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } );
+
+   Matrix savedMatrix( rows, cols, diagonalsShifts );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( diagonalsShifts.containsValue( j - i ) )
+            savedMatrix.setElement( i, j, value );
+         value++;
+      }
+
+   ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+   DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } );
+
+   Matrix m( rows, cols, diagonalsShifts );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++)
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   std::stringstream printed;
+   std::stringstream couted;
+
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+   m.print( std::cout ); //all the std::cout goes to ss
+
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1\t Col:1->2\t\n"
+             "Row: 1 ->  Col:0->5\t Col:1->6\t Col:2->7\t\n"
+             "Row: 2 ->  Col:1->10\t Col:2->11\t Col:3->12\t\n"
+             "Row: 3 ->  Col:2->15\t Col:3->16\t\n"
+             "Row: 4 ->  Col:3->20\t\n";
+
+   EXPECT_EQ( printed.str(), couted.str() );
+}
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, short >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, int >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+    ,TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, long >
+#endif
+>;
+
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
+
+TYPED_TEST( MatrixTest, getSerializationType )
+{
+   test_GetSerializationType();
+}
+
+TYPED_TEST( MatrixTest, setDimensionsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetDimensions< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setLikeTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetLike< MatrixType, MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNonemptyRowsCountTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNonemptyRowsCount< MatrixType >();
+}
+
+
+TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetCompressedRowLengths< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetAllocatedElementsCount< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, resetTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Reset< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setValueTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetValue< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, vectorProductTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_VectorProduct< MatrixType >();
+}
+
+/*TYPED_TEST( MatrixTest, addMatrixTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddMatrix< MatrixType >();
+}*/
+
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, saveAndLoadTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SaveAndLoad< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, printTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Print< MatrixType >();
+}
+
+/*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::MultidiagonalMatrixProductKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, Multidiagonal_host_int *, const int, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Multidiagonal_host_int, Matrix2=Multidiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1315): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Cuda )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::MultidiagonalMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Multidiagonal<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Multidiagonal_cuda_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1332): here\n\n";
+}
+#endif
+
+TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Host )
+{
+//    test_GetTransposition< Multidiagonal_host_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n";
+    std::cout << "AND this message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionNonAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Cuda )
+{
+//    test_GetTransposition< Multidiagonal_cuda_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test throws the following message: \n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n";
+    std::cout << "          what():  CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n";
+    std::cout << "  Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n";
+    std::cout << "  [1]    4003 abort (core dumped)  ./MultidiagonalMatrixTest-dbg\n";
+}
+#endif
+
+TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Host )
+{
+    test_PerformSORIteration< Multidiagonal_host_float >();
+}
+
+#ifdef HAVE_CUDA
+TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Cuda )
+{
+//    test_PerformSORIteration< Multidiagonal_cuda_float >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched, this test throws the following message: \n";
+    std::cout << "      [1]    6992 segmentation fault (core dumped)  ./SparseMatrixTest-dbg\n\n";
+    std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n";
+}
+#endif
+ * */
+
+#endif // HAVE_GTEST
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 9b09ef4d45cc4ee2e27fb582aa6f56e3de7e09b5..829c30677b2c7e3a0209ed72c01a991ffec56d1c 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -8,18 +8,34 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/CSR.h>
-#include <TNL/Matrices/Ellpack.h>
-#include <TNL/Matrices/SlicedEllpack.h>
-
-using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
-using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
-using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
-using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
-using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
-using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;
-
-#ifdef HAVE_GTEST 
+#include <TNL/Matrices/Legacy/CSR.h>
+#include <TNL/Matrices/Legacy/Ellpack.h>
+#include <TNL/Matrices/Legacy/SlicedEllpack.h>
+
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
+#include <TNL/Matrices/Dense.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+
+template< typename Device, typename Index, typename IndexAllocator >
+using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
+
+template< typename Device, typename Index, typename IndexAllocator >
+using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
+
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >;
+using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
+using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+using SE_cuda  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
+
+
+#ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
 /*
@@ -41,7 +57,6 @@ void setupUnevenRowSizeMatrix( Matrix& m )
 {
     const int rows = 10;
     const int cols = 6;
-    m.reset();
     m.setDimensions( rows, cols );
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
@@ -78,7 +93,7 @@ void setupUnevenRowSizeMatrix( Matrix& m )
 
     m.setElement( 7, 0, value++ );   // 7th row
 
-    for( int i = 0; i < cols - 1; i++ )  // 8th row 
+    for( int i = 0; i < cols - 1; i++ )  // 8th row
         m.setElement( 8, i, value++ );
 
     m.setElement( 9, 5, value++ );   // 9th row
@@ -138,21 +153,21 @@ void checkUnevenRowSizeMatrix( Matrix& m )
    EXPECT_EQ( m.getElement( 6, 3 ),  0 );
    EXPECT_EQ( m.getElement( 6, 4 ),  0 );
    EXPECT_EQ( m.getElement( 6, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 7, 0 ), 22 );
    EXPECT_EQ( m.getElement( 7, 1 ),  0 );
    EXPECT_EQ( m.getElement( 7, 2 ),  0 );
    EXPECT_EQ( m.getElement( 7, 3 ),  0 );
    EXPECT_EQ( m.getElement( 7, 4 ),  0 );
    EXPECT_EQ( m.getElement( 7, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 8, 0 ), 23 );
    EXPECT_EQ( m.getElement( 8, 1 ), 24 );
    EXPECT_EQ( m.getElement( 8, 2 ), 25 );
    EXPECT_EQ( m.getElement( 8, 3 ), 26 );
    EXPECT_EQ( m.getElement( 8, 4 ), 27 );
    EXPECT_EQ( m.getElement( 8, 5 ),  0 );
-   
+
    EXPECT_EQ( m.getElement( 9, 0 ),  0 );
    EXPECT_EQ( m.getElement( 9, 1 ),  0 );
    EXPECT_EQ( m.getElement( 9, 2 ),  0 );
@@ -185,7 +200,7 @@ void setupAntiTriDiagMatrix( Matrix& m )
     rowLengths.setElement( 0, 4);
     rowLengths.setElement( 1,  4 );
     m.setCompressedRowLengths( rowLengths );
-    
+
     int value = 1;
     for( int i = 0; i < rows; i++ )
         for( int j = cols - 1; j > 2; j-- )
@@ -341,81 +356,233 @@ template< typename Matrix1, typename Matrix2 >
 void testCopyAssignment()
 {
    {
-        SCOPED_TRACE("Tri Diagonal Matrix");
-        
-        Matrix1 triDiag1;
-        setupTriDiagMatrix( triDiag1 );
-        checkTriDiagMatrix( triDiag1 );
-        
-        Matrix2 triDiag2;
-        triDiag2 = triDiag1;
-        checkTriDiagMatrix( triDiag2 );
+      SCOPED_TRACE("Tri Diagonal Matrix");
+
+      Matrix1 triDiag1;
+      setupTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag1 );
+
+      Matrix2 triDiag2;
+      triDiag2 = triDiag1;
+      checkTriDiagMatrix( triDiag1 );
+      checkTriDiagMatrix( triDiag2 );
    }
-   
    {
-        SCOPED_TRACE("Anti Tri Diagonal Matrix");
-                
-        Matrix1 antiTriDiag1;
-        setupAntiTriDiagMatrix( antiTriDiag1 );
-        checkAntiTriDiagMatrix( antiTriDiag1 );
-        
-        Matrix2 antiTriDiag2;
-        antiTriDiag2 = antiTriDiag1;
-        checkAntiTriDiagMatrix( antiTriDiag2 );
+      SCOPED_TRACE("Anti Tri Diagonal Matrix");
+      Matrix1 antiTriDiag1;
+      setupAntiTriDiagMatrix( antiTriDiag1 );
+      checkAntiTriDiagMatrix( antiTriDiag1 );
+
+      Matrix2 antiTriDiag2;
+      antiTriDiag2 = antiTriDiag1;
+      checkAntiTriDiagMatrix( antiTriDiag2 );
    }
-   
    {
-        SCOPED_TRACE("Uneven Row Size Matrix");
-        Matrix1 unevenRowSize1;
-        setupUnevenRowSizeMatrix( unevenRowSize1 );
-        checkUnevenRowSizeMatrix( unevenRowSize1 );
-        
-        Matrix2 unevenRowSize2;
-        unevenRowSize2 = unevenRowSize1;
-        checkUnevenRowSizeMatrix( unevenRowSize2 );
+      SCOPED_TRACE("Uneven Row Size Matrix");
+      Matrix1 unevenRowSize1;
+      setupUnevenRowSizeMatrix( unevenRowSize1 );
+      checkUnevenRowSizeMatrix( unevenRowSize1 );
+
+      Matrix2 unevenRowSize2;
+      unevenRowSize2 = unevenRowSize1;
+
+      checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
 }
 
 template< typename Matrix1, typename Matrix2 >
 void testConversion()
 {
-    
    {
         SCOPED_TRACE("Tri Diagonal Matrix");
-        
+
         Matrix1 triDiag1;
         setupTriDiagMatrix( triDiag1 );
         checkTriDiagMatrix( triDiag1 );
-        
+
         Matrix2 triDiag2;
-        TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 );
+        triDiag2 = triDiag1;
         checkTriDiagMatrix( triDiag2 );
    }
-   
+
    {
         SCOPED_TRACE("Anti Tri Diagonal Matrix");
-                
+
         Matrix1 antiTriDiag1;
         setupAntiTriDiagMatrix( antiTriDiag1 );
         checkAntiTriDiagMatrix( antiTriDiag1 );
-        
+
         Matrix2 antiTriDiag2;
-        TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 );
+        antiTriDiag2 = antiTriDiag1;
         checkAntiTriDiagMatrix( antiTriDiag2 );
    }
-   
+
    {
         SCOPED_TRACE("Uneven Row Size Matrix");
         Matrix1 unevenRowSize1;
         setupUnevenRowSizeMatrix( unevenRowSize1 );
         checkUnevenRowSizeMatrix( unevenRowSize1 );
-        
+
         Matrix2 unevenRowSize2;
-        TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 );
+        unevenRowSize2 = unevenRowSize1;
         checkUnevenRowSizeMatrix( unevenRowSize2 );
    }
 }
 
+template< typename Matrix >
+void tridiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ )
+         hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 };
+
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) > 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
+
+template< typename Matrix >
+void multidiagonalMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
+   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+
+   const IndexType rows( 10 ), columns( 10 );
+   MultidiagonalHost hostMatrix( rows, columns, diagonals );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( diagonals.containsValue( j - i ) )
+            hostMatrix.setElement( i, j, i + j );
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 };
+   /*std::cerr << "hostMatrix " << hostMatrix << std::endl;
+   std::cerr << "matrix " << matrix << std::endl;
+   std::cerr << "rowCapacities " << rowCapacities << std::endl;*/
+
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+
+#ifdef HAVE_CUDA
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonals );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( diagonals.containsValue( j - i ) )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+template< typename Matrix >
+void denseMatrixAssignment()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix;
+   matrix = hostMatrix;
+   using RowCapacitiesType = typename Matrix::RowsCapacitiesType;
+   RowCapacitiesType rowCapacities;
+   matrix.getCompressedRowLengths( rowCapacities );
+   RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   cudaMatrix = hostMatrix;
+   matrix = cudaMatrix;
+   matrix.getCompressedRowLengths( rowCapacities );
+   EXPECT_EQ( rowCapacities, exactRowLengths );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
+}
 
 TEST( SparseMatrixCopyTest, CSR_HostToHost )
 {
@@ -485,8 +652,8 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
 }
 #endif
 
-
-// test conversion between formats
+////
+// Test of conversion between formats
 TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host )
 {
    testConversion< CSR_host, E_host >();
@@ -549,6 +716,108 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
 }
 #endif
 
-#endif
+////
+// Tridiagonal matrix assignment test
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host )
+{
+   tridiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host )
+{
+   tridiagonalMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   tridiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda )
+{
+   tridiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   tridiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   tridiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+////
+// Multidiagonal matrix assignment test
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host )
+{
+   multidiagonalMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host )
+{
+   multidiagonalMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host )
+{
+   multidiagonalMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda )
+{
+   multidiagonalMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda )
+{
+   multidiagonalMatrixAssignment< E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   multidiagonalMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+////
+// Dense matrix assignment test
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host )
+{
+   denseMatrixAssignment< CSR_host >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host )
+{
+   denseMatrixAssignment< E_host >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host )
+{
+   denseMatrixAssignment< SE_host >();
+}
+
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda )
+{
+   denseMatrixAssignment< CSR_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda )
+{
+   denseMatrixAssignment< E_cuda >();
+}
+
+TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda )
+{
+   denseMatrixAssignment< SE_cuda >();
+}
+#endif // HAVE_CUDA
+
+#endif //HAVE_GTEST
 
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 5baeb42791a526731277adfaa20715a533ab956c..30d3a692d1a843e90600bffa560314535762e7ad 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -1,39 +1,1405 @@
 /***************************************************************************
                           SparseMatrixTest.h -  description
                              -------------------
-    begin                : Nov 2, 2018
+    begin                : Nov 22, 2018
     copyright            : (C) 2018 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/CSR.h>
-
-#include "SparseMatrixTest.hpp"
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
 #include <iostream>
+#include <sstream>
 
-#ifdef HAVE_GTEST 
+#ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
-using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >;
-using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
 
-using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >;
-using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
 
-TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host )
+template< typename Matrix >
+void test_Constructors()
 {
-    test_PerformSORIteration< CSR_host_float >();
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   Matrix m1( 5, 6 );
+   EXPECT_EQ( m1.getRows(), 5 );
+   EXPECT_EQ( m1.getColumns(), 6 );
+
+   Matrix m2( {1, 2, 2, 2, 1 }, 5 );
+   typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 };
+   m2.setElement( 0, 0, 1 );   // 0th row
+   m2.setElement( 1, 0, 1 );   // 1st row
+   m2.setElement( 1, 1, 1 );
+   m2.setElement( 2, 1, 1 );   // 2nd row
+   m2.setElement( 2, 2, 1 );
+   m2.setElement( 3, 2, 1 );   // 3rd row
+   m2.setElement( 3, 3, 1 );
+   m2.setElement( 4, 4, 1 );   // 4th row
+   m2.getCompressedRowLengths( v1 );
+
+   EXPECT_EQ( v1, v2 );
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  4  5  6  0 |
+    *    |  0  0  7  8  9 |
+    *    | 10  0  0  0  0 |
+    *    |  0 11  0  0  0 |
+    *    \  0  0  0 12  0 /
+    */
+
+   Matrix m3( 6, 5, {
+      { 0, 0,  1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 },
+      { 1, 1,  4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 },
+      { 2, 2,  7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 },
+      { 3, 0, 10.0 },
+      { 4, 1, 11.0 },
+      { 5, 3, 12.0 } } );
+
+   // Check the set elements
+   EXPECT_EQ( m3.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m3.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m3.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m3.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m3.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m3.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m3.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m3.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m3.getElement( 2, 4 ),  9 );
+
+   EXPECT_EQ( m3.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m3.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m3.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m3.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m3.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m3.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m3.getElement( 5, 4 ),  0 );
 }
 
-#ifdef HAVE_CUDA
-TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda )
+template< typename Matrix >
+void test_SetDimensions()
 {
-   //    test_PerformSORIteration< CSR_cuda_float >();
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
 }
-#endif
 
-#endif
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
+
+   IndexType rowLength = 1;
+   for( IndexType i = 2; i < rows; i++ )
+      rowLengths.setElement( i, rowLength++ );
+
+   m.setCompressedRowLengths( rowLengths );
+
+   // Insert values into the rows.
+   RealType value = 1;
+
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m.setElement( 0, i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )      // 1st row
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 1; i++ )      // 2nd row
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )      // 3rd row
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )      // 4th row
+      m.setElement( 4, i, value++ );
+
+   for( IndexType i = 0; i < 4; i++ )      // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 5; i++ )      // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 6; i++ )      // 7th row
+      m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )      // 8th row
+      m.setElement( 8, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )      // 9th row
+      m.setElement( 9, i, value++ );
+
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+   m.reset();
+
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_GetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      RealType val;
+      switch( rowIdx )
+      {
+         case 0:
+            val = 1;
+            for( IndexType i = 0; i < 4; i++ )
+               row.setElement( i, 2 * i, val++ );
+            break;
+         case 1:
+            val = 5;
+            for( IndexType i = 0; i < 3; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 2:
+            val = 8;
+            for( IndexType i = 0; i < 8; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 3:
+            val = 16;
+            for( IndexType i = 0; i < 2; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 4:
+            row.setElement( 0, 0, 18 );
+            break;
+         case 5:
+            row.setElement( 0, 0, 19 );
+            break;
+         case 6:
+            row.setElement( 0, 0, 20 );
+            break;
+         case 7:
+            row.setElement( 0, 0, 21 );
+            break;
+         case 8:
+            val = 22;
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, val++ );
+            break;
+         case 9:
+            val = 32;
+            for( IndexType i = 0; i < rows; i++ )
+               row.setElement( i, i, val++ );
+            break;
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+
+template< typename Matrix >
+void test_SetElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  2  0  3  0  4  0  0  0  \
+    *    |  5  6  7  0  0  0  0  0  0  0  |
+    *    |  8  9 10 11 12 13 14 15  0  0  |
+    *    | 16 17  0  0  0  0  0  0  0  0  |
+    *    | 18  0  0  0  0  0  0  0  0  0  |
+    *    | 19  0  0  0  0  0  0  0  0  0  |
+    *    | 20  0  0  0  0  0  0  0  0  0  |
+    *    | 21  0  0  0  0  0  0  0  0  0  |
+    *    | 22 23 24 25 26 27 28 29 30 31  |
+    *    \ 32 33 34 35 36 37 38 39 40 41 /
+    */
+
+   const IndexType rows = 10;
+   const IndexType cols = 10;
+
+   Matrix m;
+   m.reset();
+
+   m.setDimensions( rows, cols );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 };
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 0, 2 * i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 0; i < 2; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 8; i++ )
+      m.setElement( i, 0, value++ );
+
+   for( IndexType j = 8; j < rows; j++)
+      for( IndexType i = 0; i < cols; i++ )
+         m.setElement( j, i, value++ );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 5 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 7 ), 15 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 16 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 17 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ), 18 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ), 19 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 20 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ), 21 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ), 22 );
+   EXPECT_EQ( m.getElement( 8, 1 ), 23 );
+   EXPECT_EQ( m.getElement( 8, 2 ), 24 );
+   EXPECT_EQ( m.getElement( 8, 3 ), 25 );
+   EXPECT_EQ( m.getElement( 8, 4 ), 26 );
+   EXPECT_EQ( m.getElement( 8, 5 ), 27 );
+   EXPECT_EQ( m.getElement( 8, 6 ), 28 );
+   EXPECT_EQ( m.getElement( 8, 7 ), 29 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 30 );
+   EXPECT_EQ( m.getElement( 8, 9 ), 31 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ), 32 );
+   EXPECT_EQ( m.getElement( 9, 1 ), 33 );
+   EXPECT_EQ( m.getElement( 9, 2 ), 34 );
+   EXPECT_EQ( m.getElement( 9, 3 ), 35 );
+   EXPECT_EQ( m.getElement( 9, 4 ), 36 );
+   EXPECT_EQ( m.getElement( 9, 5 ), 37 );
+   EXPECT_EQ( m.getElement( 9, 6 ), 38 );
+   EXPECT_EQ( m.getElement( 9, 7 ), 39 );
+   EXPECT_EQ( m.getElement( 9, 8 ), 40 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 41 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  3  0  0 \
+    *    |  0  4  5  6  0 |
+    *    |  0  0  7  8  9 |
+    *    | 10  1  1  0  0 |
+    *    |  0 11  1  1  0 |
+    *    \  0  0  1 12  1 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols, {
+      { 0, 0,  1 }, { 0, 1,  2 }, { 0, 2, 3 },
+                    { 1, 1,  4 }, { 1, 2, 5 }, { 1, 3,  6 },
+                                  { 2, 2, 7 }, { 2, 3,  8 }, { 2, 4, 9 },
+      { 3, 0, 10 }, { 3, 1,  1 }, { 3, 2, 1 },
+                    { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3,  1 },
+                                  { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } );
+   /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )     // 0th row
+      m.setElement( 0, i, value++ );
+
+   for( IndexType i = 1; i < cols - 1; i++ )     // 1st row
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 2; i < cols; i++ )         // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 3, 0, value++ );      // 3rd row
+
+   m.setElement( 4, 1, value++ );      // 4th row
+
+   m.setElement( 5, 3, value++ );      // 5th row*/
+
+
+   // Check the set elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  9 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  1 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  1 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  1 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  3  6  9  0  0 \
+    *    |  0 12 15 18  0 |
+    *    |  0  0 21 24 27 |
+    *    | 30 13 14  0  0 |
+    *    |  0 35 16 17  0 |
+    *    \  0  0 18 41 20 /
+    */
+
+   RealType newValue = 1;
+   for( IndexType i = 0; i < cols - 2; i++ )         // 0th row
+      m.addElement( 0, i, newValue++, 2.0 );
+
+   for( IndexType i = 1; i < cols - 1; i++ )         // 1st row
+      m.addElement( 1, i, newValue++, 2.0 );
+
+   for( IndexType i = 2; i < cols; i++ )             // 2nd row
+      m.addElement( 2, i, newValue++, 2.0 );
+
+   for( IndexType i = 0; i < cols - 2; i++ )         // 3rd row
+      m.addElement( 3, i, newValue++, 2.0 );
+
+   for( IndexType i = 1; i < cols - 1; i++ )         // 4th row
+      m.addElement( 4, i, newValue++, 2.0 );
+
+   for( IndexType i = 2; i < cols; i++ )             // 5th row
+      m.addElement( 5, i, newValue++, 2.0 );
+
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 2, 4 ), 27 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ), 30 );
+   EXPECT_EQ( m.getElement( 3, 1 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 14 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ), 35 );
+   EXPECT_EQ( m.getElement( 4, 2 ), 16 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 5, 3 ), 41 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 20 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  2  0  3 |
+    *    |  0  4  0  0 |
+    *    \  0  0  5  0 /
+    */
+
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
 
-#include "../main.h"
+   Matrix m_1;
+   m_1.reset();
+   m_1.setDimensions( m_rows_1, m_cols_1 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 };
+   m_1.setCompressedRowLengths( rowLengths_1 );
+
+   RealType value_1 = 1;
+   m_1.setElement( 0, 0, value_1++ );      // 0th row
+
+   m_1.setElement( 1, 1, value_1++ );      // 1st row
+   m_1.setElement( 1, 3, value_1++ );
+
+   m_1.setElement( 2, 1, value_1++ );      // 2nd row
+
+   m_1.setElement( 3, 2, value_1++ );      // 3rd row
+
+   VectorType inVector_1;
+   inVector_1.setSize( m_cols_1 );
+   for( IndexType i = 0; i < inVector_1.getSize(); i++ )
+       inVector_1.setElement( i, 2 );
+
+   VectorType outVector_1;
+   outVector_1.setSize( m_rows_1 );
+   for( IndexType j = 0; j < outVector_1.getSize(); j++ )
+       outVector_1.setElement( j, 0 );
+
+   m_1.vectorProduct( inVector_1, outVector_1 );
+
+   EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
+   EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    \  0  8  0  0 /
+    */
+
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
+
+   Matrix m_2( m_rows_2, m_cols_2 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 };
+   m_2.setCompressedRowLengths( rowLengths_2 );
+
+   RealType value_2 = 1;
+   for( IndexType i = 0; i < 3; i++ )      // 0th row
+      m_2.setElement( 0, i, value_2++ );
+
+   m_2.setElement( 1, 3, value_2++ );      // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )      // 2nd row
+      m_2.setElement( 2, i, value_2++ );
+
+   for( IndexType i = 1; i < 2; i++ )      // 3rd row
+      m_2.setElement( 3, i, value_2++ );
+
+   VectorType inVector_2;
+   inVector_2.setSize( m_cols_2 );
+   for( IndexType i = 0; i < inVector_2.getSize(); i++ )
+      inVector_2.setElement( i, 2 );
+
+   VectorType outVector_2;
+   outVector_2.setSize( m_rows_2 );
+   for( IndexType j = 0; j < outVector_2.getSize(); j++ )
+      outVector_2.setElement( j, 0 );
+
+   m_2.vectorProduct( inVector_2, outVector_2 );
+
+   EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_2.getElement( 1 ),  8 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 36 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  5  6 |
+    *    |  7  8  9  0 |
+    *    \  0 10 11 12 /
+    */
+
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
+
+   Matrix m_3( m_rows_3, m_cols_3 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 };
+   m_3.setCompressedRowLengths( rowLengths_3 );
+
+   RealType value_3 = 1;
+   for( IndexType i = 0; i < 3; i++ )          // 0th row
+      m_3.setElement( 0, i, value_3++ );
+
+   for( IndexType i = 1; i < 4; i++ )
+      m_3.setElement( 1, i, value_3++ );      // 1st row
+
+   for( IndexType i = 0; i < 3; i++ )          // 2nd row
+      m_3.setElement( 2, i, value_3++ );
+
+   for( IndexType i = 1; i < 4; i++ )          // 3rd row
+      m_3.setElement( 3, i, value_3++ );
+
+   VectorType inVector_3;
+   inVector_3.setSize( m_cols_3 );
+   for( IndexType i = 0; i < inVector_3.getSize(); i++ )
+      inVector_3.setElement( i, 2 );
+
+   VectorType outVector_3;
+   outVector_3.setSize( m_rows_3 );
+   for( IndexType j = 0; j < outVector_3.getSize(); j++ )
+      outVector_3.setElement( j, 0 );
+
+   m_3.vectorProduct( inVector_3, outVector_3 );
+
+   EXPECT_EQ( outVector_3.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_3.getElement( 1 ), 30 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 48 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  0  4  0  0 \
+    *    |  0  5  6  7  8  0  0  0 |
+    *    |  9 10 11 12 13  0  0  0 |
+    *    |  0 14 15 16 17  0  0  0 |
+    *    |  0  0 18 19 20 21  0  0 |
+    *    |  0  0  0 22 23 24 25  0 |
+    *    | 26 27 28 29 30  0  0  0 |
+    *    \ 31 32 33 34 35  0  0  0 /
+    */
+
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
+
+   Matrix m_4( m_rows_4, m_cols_4 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 };
+   m_4.setCompressedRowLengths( rowLengths_4 );
+
+   RealType value_4 = 1;
+   for( IndexType i = 0; i < 3; i++ )       // 0th row
+      m_4.setElement( 0, i, value_4++ );
+
+   m_4.setElement( 0, 5, value_4++ );
+
+   for( IndexType i = 1; i < 5; i++ )       // 1st row
+      m_4.setElement( 1, i, value_4++ );
+
+   for( IndexType i = 0; i < 5; i++ )       // 2nd row
+      m_4.setElement( 2, i, value_4++ );
+
+   for( IndexType i = 1; i < 5; i++ )       // 3rd row
+      m_4.setElement( 3, i, value_4++ );
+
+   for( IndexType i = 2; i < 6; i++ )       // 4th row
+      m_4.setElement( 4, i, value_4++ );
+
+   for( IndexType i = 3; i < 7; i++ )       // 5th row
+      m_4.setElement( 5, i, value_4++ );
+
+   for( IndexType i = 0; i < 5; i++ )       // 6th row
+      m_4.setElement( 6, i, value_4++ );
+
+   for( IndexType i = 0; i < 5; i++ )       // 7th row
+      m_4.setElement( 7, i, value_4++ );
+
+   VectorType inVector_4;
+   inVector_4.setSize( m_cols_4 );
+   for( IndexType i = 0; i < inVector_4.getSize(); i++ )
+      inVector_4.setElement( i, 2 );
+
+   VectorType outVector_4;
+   outVector_4.setSize( m_rows_4 );
+   for( IndexType j = 0; j < outVector_4.getSize(); j++ )
+      outVector_4.setElement( j, 0 );
+
+   m_4.vectorProduct( inVector_4, outVector_4 );
+
+   EXPECT_EQ( outVector_4.getElement( 0 ),  20 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  52 );
+   EXPECT_EQ( outVector_4.getElement( 2 ), 110 );
+   EXPECT_EQ( outVector_4.getElement( 3 ), 124 );
+   EXPECT_EQ( outVector_4.getElement( 4 ), 156 );
+   EXPECT_EQ( outVector_4.getElement( 5 ), 188 );
+   EXPECT_EQ( outVector_4.getElement( 6 ), 280 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
+
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+
+   Matrix m_5( m_rows_5, m_cols_5 );
+   typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m_5.setCompressedRowLengths( rowLengths_5 );
+
+   RealType value_5 = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m_5.setElement( 0, i, value_5++ );
+
+   m_5.setElement( 0, 4, value_5++ );           // 0th row
+   m_5.setElement( 0, 5, value_5++ );
+
+   m_5.setElement( 1, 1, value_5++ );           // 1st row
+   m_5.setElement( 1, 3, value_5++ );
+
+   for( IndexType i = 1; i < 3; i++ )            // 2nd row
+      m_5.setElement( 2, i, value_5++ );
+
+   m_5.setElement( 2, 4, value_5++ );           // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )            // 3rd row
+      m_5.setElement( 3, i, value_5++ );
+
+   m_5.setElement( 4, 1, value_5++ );           // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )            // 5th row
+      m_5.setElement( 5, i, value_5++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 6th row
+      m_5.setElement( 6, i, value_5++ );
+
+   for( IndexType i = 0; i < 8; i++ )            // 7th row
+      m_5.setElement( 7, i, value_5++ );
+
+   for( IndexType i = 0; i < 7; i++ )            // 1s at the end of rows
+      m_5.setElement( i, 7, 1);
+
+   VectorType inVector_5;
+   inVector_5.setSize( m_cols_5 );
+   for( IndexType i = 0; i < inVector_5.getSize(); i++ )
+       inVector_5.setElement( i, 2 );
+
+   VectorType outVector_5;
+   outVector_5.setSize( m_rows_5 );
+   for( IndexType j = 0; j < outVector_5.getSize(); j++ )
+       outVector_5.setElement( j, 0 );
+
+   m_5.vectorProduct( inVector_5, outVector_5 );
+
+   EXPECT_EQ( outVector_5.getElement( 0 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 1 ),  28 );
+   EXPECT_EQ( outVector_5.getElement( 2 ),  56 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 102 );
+   EXPECT_EQ( outVector_5.getElement( 4 ),  32 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
+}
+
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+   typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
+   m.setCompressedRowLengths( rowsCapacities );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 0, 4, value++ );       // 0th row
+   m.setElement( 0, 5, value++ );
+
+   m.setElement( 1, 1, value++ );       // 1st row
+   m.setElement( 1, 3, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 2, 4, value++ );       // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
+      m.setElement( 3, i, value++ );
+
+   m.setElement( 4, 1, value++ );       // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
+       m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( fetch, reduce, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+
+   m.setElement( 0, 0, 4.0 );        // 0th row
+   m.setElement( 0, 1, 1.0);
+
+   m.setElement( 1, 0, 1.0 );        // 1st row
+   m.setElement( 1, 1, 4.0 );
+   m.setElement( 1, 2, 1.0 );
+
+   m.setElement( 2, 1, 1.0 );        // 2nd row
+   m.setElement( 2, 2, 4.0 );
+   m.setElement( 2, 3, 1.0 );
+
+   m.setElement( 3, 2, 1.0 );        // 3rd row
+   m.setElement( 3, 3, 4.0 );
+
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+
+   IndexType row = 0;
+   RealType omega = 1;
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
+}
+
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  4  0  5 |
+    *    |  6  7  8  0 |
+    *    \  0  9 10 11 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix savedMatrix( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   savedMatrix.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      savedMatrix.setElement( 0, i, value++ );
+
+   savedMatrix.setElement( 1, 1, value++ );
+   savedMatrix.setElement( 1, 3, value++ );      // 1st row
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      savedMatrix.setElement( 2, i, value++ );
+
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      savedMatrix.setElement( 3, i, value++ );
+
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  5 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  8 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  9 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 );
+
+   EXPECT_EQ( std::remove( filename ), 0 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  0  0  0  4 |
+    *    |  5  6  7  0 |
+    *    |  0  8  9 10 |
+    *    \  0  0 11 12 /
+    */
+
+   const IndexType m_rows = 5;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
+   m.setCompressedRowLengths( rowLengths );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 1, 3, value++ );                // 1st row
+
+   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
+      m.setElement( 4, i, value++ );
+
+   std::stringstream printed;
+   std::stringstream couted;
+
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+   m.print( std::cout ); //all the std::cout goes to ss
+
+   std::cout.rdbuf(old_buf); //reset
+
+   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
+             "Row: 1 ->  Col:3->4\t\n"
+             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
+             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
+             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
+
+   EXPECT_EQ( printed.str(), couted.str() );
+}
+
+#endif
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
index 258ad2c53831010111eeec9dc240368ae5dffb35..5830658abd3135064cc46c8a7c534252a0421935 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSR.cpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
index 258ad2c53831010111eeec9dc240368ae5dffb35..91f0de81a928a6f5676b2d839a35496dfdae61c3 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSR.cu -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
index 3530db46c18753102a09b15908fcc5d34fa66026..a72d548f5bdc98c6fbd7920507b4c1978f58ef00 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
@@ -1,16 +1,18 @@
 /***************************************************************************
                           SparseMatrixTest_CSR.h -  description
                              -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Dec 2, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/CSR.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
 
-#include "SparseMatrixTest.hpp"
+
+#include "SparseMatrixTest.h"
 #include <iostream>
 
 #ifdef HAVE_GTEST
@@ -27,36 +29,35 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::CSR< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< double, TNL::Devices::Host, short >,
-    TNL::Matrices::CSR< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::CSR< double, TNL::Devices::Host, int >,
-    TNL::Matrices::CSR< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::CSR< double, TNL::Devices::Host, long >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::CSR< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::CSR< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::CSR< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::CSR< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::CSR< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::CSR< double, TNL::Devices::Cuda, long >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >
 #endif
 >;
 
 TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes);
 
+TYPED_TEST( CSRMatrixTest, Constructors )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_Constructors< CSRMatrixType >();
+}
+
 TYPED_TEST( CSRMatrixTest, setDimensionsTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
@@ -64,19 +65,12 @@ TYPED_TEST( CSRMatrixTest, setDimensionsTest )
     test_SetDimensions< CSRMatrixType >();
 }
 
-//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
-//{
-////    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-//
-////    test_SetCompressedRowLengths< CSRMatrixType >();
-//
-//    bool testRan = false;
-//    EXPECT_TRUE( testRan );
-//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-//    std::cout << "      This test is dependent on the input format. \n";
-//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
-//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
-//}
+TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_SetCompressedRowLengths< CSRMatrixType >();
+}
 
 TYPED_TEST( CSRMatrixTest, setLikeTest )
 {
@@ -92,6 +86,14 @@ TYPED_TEST( CSRMatrixTest, resetTest )
     test_Reset< CSRMatrixType >();
 }
 
+TYPED_TEST( CSRMatrixTest, getRowTest )
+{
+    using CSRMatrixType = typename TestFixture::CSRMatrixType;
+
+    test_GetRow< CSRMatrixType >();
+}
+
+
 TYPED_TEST( CSRMatrixTest, setElementTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
@@ -106,25 +108,25 @@ TYPED_TEST( CSRMatrixTest, addElementTest )
     test_AddElement< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, setRowTest )
+TYPED_TEST( CSRMatrixTest, vectorProductTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-    test_SetRow< CSRMatrixType >();
+    test_VectorProduct< CSRMatrixType >();
 }
 
-TYPED_TEST( CSRMatrixTest, vectorProductTest )
+TYPED_TEST( CSRMatrixTest, rowsReduction )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-    test_VectorProduct< CSRMatrixType >();
+    test_RowsReduction< CSRMatrixType >();
 }
 
 TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" );
+    test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR_segments" );
 }
 
 TYPED_TEST( CSRMatrixTest, printTest )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
index c454706f0b1d437b798f2d7a1e93ccf4c0291d3f..3c30c54c5e8fd6fe5213367e410fd039b4edabb3 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_Ellpack.cpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
index c454706f0b1d437b798f2d7a1e93ccf4c0291d3f..9a27cece6e440e42061781c9529660dfac80eacc 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_Ellpack.cu -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_Ellpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
index 979068e02ea2d5b4ed5c3dc4f4db2a566c027934..2a890e694f4ca90edc7aa3b98fba56f666c2097d 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h
@@ -1,16 +1,18 @@
 /***************************************************************************
                           SparseMatrixTest_Ellpack.h -  description
                              -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
 
-#include "SparseMatrixTest.hpp"
+
+#include "SparseMatrixTest.h"
 #include <iostream>
 
 #ifdef HAVE_GTEST
@@ -24,38 +26,48 @@ protected:
    using EllpackMatrixType = Matrix;
 };
 
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAlocator >
+using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >;
+
 // types for which MatrixTest is instantiated
 using EllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Host, short >,
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Host, int >,
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Host, long >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::Ellpack< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::Ellpack< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::Ellpack< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, long >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >
 #endif
 >;
 
-TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes );
+TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes);
+
+TYPED_TEST( EllpackMatrixTest, Constructors )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_Constructors< EllpackMatrixType >();
+}
 
 TYPED_TEST( EllpackMatrixTest, setDimensionsTest )
 {
@@ -92,6 +104,13 @@ TYPED_TEST( EllpackMatrixTest, resetTest )
     test_Reset< EllpackMatrixType >();
 }
 
+TYPED_TEST( EllpackMatrixTest, getRowTest )
+{
+    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
+
+    test_GetRow< EllpackMatrixType >();
+}
+
 TYPED_TEST( EllpackMatrixTest, setElementTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
@@ -106,25 +125,25 @@ TYPED_TEST( EllpackMatrixTest, addElementTest )
     test_AddElement< EllpackMatrixType >();
 }
 
-TYPED_TEST( EllpackMatrixTest, setRowTest )
+TYPED_TEST( EllpackMatrixTest, vectorProductTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
 
-    test_SetRow< EllpackMatrixType >();
+    test_VectorProduct< EllpackMatrixType >();
 }
 
-TYPED_TEST( EllpackMatrixTest, vectorProductTest )
+TYPED_TEST( EllpackMatrixTest, rowsReduction )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
 
-    test_VectorProduct< EllpackMatrixType >();
+    test_RowsReduction< EllpackMatrixType >();
 }
 
 TYPED_TEST( EllpackMatrixTest, saveAndLoadTest )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
 
-    test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack" );
+    test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" );
 }
 
 TYPED_TEST( EllpackMatrixTest, printTest )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
index 40e2e94b81ca64051ddceee82f46dd2d20e66e42..2c79ee5027bc9dcbcaad4ddab932976d1eb0c790 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_SlicedEllpack.cpp -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
index 40e2e94b81ca64051ddceee82f46dd2d20e66e42..bff81d9a3e008385dd13923f9e9aca7281611e55 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu
@@ -1 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_SlicedEllpack.cu -  description
+                             -------------------
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
 #include "SparseMatrixTest_SlicedEllpack.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
index 0798f59dc49fbb5ada03d975fe60a61ae3e85fcc..17b48dcf461e4b8e99a9d1d9172ded8301b20038 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h
@@ -1,16 +1,19 @@
 /***************************************************************************
                           SparseMatrixTest_SlicedEllpack.h -  description
                              -------------------
-    begin                : Nov 2, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    begin                : Dec 3, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include <TNL/Matrices/SlicedEllpack.h>
+#include <TNL/Containers/Segments/SlicedEllpack.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Matrices/MatrixType.h>
 
-#include "SparseMatrixTest.hpp"
+
+#include "SparseMatrixTest.h"
 #include <iostream>
 
 #ifdef HAVE_GTEST
@@ -24,38 +27,48 @@ protected:
    using SlicedEllpackMatrixType = Matrix;
 };
 
+////
+// Row-major format is used for the host system
+template< typename Device, typename Index, typename IndexAllocator >
+using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >;
+
+
+////
+// Column-major format is used for GPUs
+template< typename Device, typename Index, typename IndexAllocator >
+using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >;
+
 // types for which MatrixTest is instantiated
 using SlicedEllpackMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, short >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, int >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, long >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::SlicedEllpack< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::SlicedEllpack< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::SlicedEllpack< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, long >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >
 #endif
 >;
 
-TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes );
+TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes);
+
+TYPED_TEST( SlicedEllpackMatrixTest, Constructors )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_Constructors< SlicedEllpackMatrixType >();
+}
 
 TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
 {
@@ -64,19 +77,12 @@ TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest )
     test_SetDimensions< SlicedEllpackMatrixType >();
 }
 
-//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
-//{
-////    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-//
-////    test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
-//
-//    bool testRan = false;
-//    EXPECT_TRUE( testRan );
-//    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-//    std::cout << "      This test is dependent on the input format. \n";
-//    std::cout << "      Almost every format allocates elements per row differently.\n\n";
-//    std::cout << "\n    TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n";
-//}
+TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest )
+{
+   using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+   test_SetCompressedRowLengths< SlicedEllpackMatrixType >();
+}
 
 TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest )
 {
@@ -92,6 +98,13 @@ TYPED_TEST( SlicedEllpackMatrixTest, resetTest )
     test_Reset< SlicedEllpackMatrixType >();
 }
 
+TYPED_TEST( SlicedEllpackMatrixTest, getRowTest )
+{
+    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
+
+    test_GetRow< SlicedEllpackMatrixType >();
+}
+
 TYPED_TEST( SlicedEllpackMatrixTest, setElementTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
@@ -106,25 +119,25 @@ TYPED_TEST( SlicedEllpackMatrixTest, addElementTest )
     test_AddElement< SlicedEllpackMatrixType >();
 }
 
-TYPED_TEST( SlicedEllpackMatrixTest, setRowTest )
+TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
 
-    test_SetRow< SlicedEllpackMatrixType >();
+    test_VectorProduct< SlicedEllpackMatrixType >();
 }
 
-TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest )
+TYPED_TEST( SlicedEllpackMatrixTest, rowsReduction )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
 
-    test_VectorProduct< SlicedEllpackMatrixType >();
+    test_RowsReduction< SlicedEllpackMatrixType >();
 }
 
 TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
 
-    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack" );
+    test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" );
 }
 
 TYPED_TEST( SlicedEllpackMatrixTest, printTest )
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..02fd8c585366f4da12d1218a28adca717dd2cdf2
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -0,0 +1,112 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest.h -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include "SymmetricSparseMatrixTest.hpp"
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes);
+
+TYPED_TEST( MatrixTest, setDimensionsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetDimensions< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setCompressedRowLengthsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetCompressedRowLengths< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setLikeTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetLike< MatrixType, MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElements )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, resetTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Reset< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetRow< MatrixType >();
+}
+
+
+TYPED_TEST( MatrixTest, setElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, vectorProductTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_VectorProduct< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, rowsReduction )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_RowsReduction< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, saveAndLoadTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SaveAndLoad< MatrixType >( saveAndLoadTestFileName );
+}
+
+TYPED_TEST( MatrixTest, printTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Print< MatrixType >();
+}
+
+#endif
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..58a4f4fce0e4fdafcaf2c095e6d35875c51d286b
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -0,0 +1,1153 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest.h -  description
+                             -------------------
+    begin                : Feb 7, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/AtomicOperations.h>
+#include <iostream>
+#include <sstream>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+template< typename MatrixHostFloat, typename MatrixHostInt >
+void host_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename MatrixCudaFloat, typename MatrixCudaInt >
+void cuda_test_GetType()
+{
+   bool testRan = false;
+   EXPECT_TRUE( testRan );
+   std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+   std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix >
+void test_SetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    /  1  2  4  7                       \
+    |  2  3  5  8 10 13 16 19           |
+    |  4  5  6    11 14       21 24 27  |
+    |  7  8     9       17 20           |
+    |    10 11    12          22 25     |
+    |    13 14       15             28  |
+    |    16    17       18              |
+    |    19    20          21           |
+    |       21    22          23        |
+    |       24    25             26     |
+    \       27       28             30  /
+    */
+   const IndexType rows = 11;
+   const IndexType cols = 11;
+
+   Matrix m( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3  };
+   m.setCompressedRowLengths( rowLengths );
+
+   // Insert values into the rows.
+   RealType value = 1;
+
+   // 0th row - lower part
+   m.setElement( 0, 0, value++ );
+
+   // 1st row - lower part
+   m.setElement( 1, 0, value++ );
+   m.setElement( 1, 1, value++ );
+
+   // 2nd row - lower part
+   m.setElement( 2, 0, value++ );
+   m.setElement( 2, 1, value++ );
+   m.setElement( 2, 2, value++ );
+
+   // 3rd row - lower part
+   m.setElement( 3, 0, value++ );
+   m.setElement( 3, 1, value++ );
+   m.setElement( 3, 3, value++ );
+
+   // 4th row - lower part
+   m.setElement( 4, 1, value++ );
+   m.setElement( 4, 2, value++ );
+   m.setElement( 4, 4, value++ );
+
+   // 5th row - lower part
+   m.setElement( 5, 1, value++ );
+   m.setElement( 5, 2, value++ );
+   m.setElement( 5, 5, value++ );
+
+   // 6th row - lower part
+   m.setElement( 6, 1, value++ );
+   m.setElement( 6, 3, value++ );
+   m.setElement( 6, 6, value++ );
+
+   // 7th row - lower part
+   m.setElement( 7, 1, value++ );
+   m.setElement( 7, 3, value++ );
+   m.setElement( 7, 7, value++ );
+
+   // 8th row - lower part
+   m.setElement( 8, 2, value++ );
+   m.setElement( 8, 4, value++ );
+   m.setElement( 8, 8, value++ );
+
+   // 8th row - lower part
+   m.setElement( 9, 2, value++ );
+   m.setElement( 9, 4, value++ );
+   m.setElement( 9, 9, value++ );
+
+   // 8th row - lower part
+   m.setElement( 10,  2, value++ );
+   m.setElement( 10,  5, value++ );
+   m.setElement( 10, 10, value++ );
+
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+
+   Matrix1 m1( rows + 1, cols + 2 );
+   Matrix2 m2( rows, cols );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    /  1  2  4  7                       \ -> 4
+    |  2  3  5  8 10 13 16 19           | -> 8
+    |  4  5  6    11 14       21 25 28  | -> 8
+    |  7  8     9       17 20           | -> 5
+    |    10 11    12          23 26     | -> 5
+    |    13 14       15             29  | -> 4
+    |    16    17       18              | -> 3
+    |    19    20          21           | -> 3
+    |       22    23          24        | -> 3
+    |       25    26             27     | -> 3
+    \       28       29             30  / -> 3
+                                          ----
+                                            49
+    */
+
+   const IndexType rows = 11;
+   const IndexType cols = 11;
+
+   Matrix m( rows, cols, {
+      { 0, 0,  1 },
+      { 1, 0,  2 }, { 1, 1,  3 },
+      { 2, 0,  4 }, { 2, 1,  5 }, {  2, 2,  6 },
+      { 3, 0,  7 }, { 3, 1,  8 },                { 3, 3,  9 },
+                    { 4, 1, 10 }, {  4, 2, 11 },               { 4, 4, 12 },
+                    { 5, 1, 13 }, {  5, 2, 14 },                              {  5, 5, 15 },
+                    { 6, 1, 16 },                { 6, 3, 17 },                              { 6, 6, 18 },
+                    { 7, 1, 19 },                { 7, 3, 20 },                                            { 7, 7, 21 },
+                                  {  8, 2, 22 },               { 8, 4, 23 },                                           { 8, 8, 24 },
+                                  {  9, 2, 25 },               { 9, 4, 26 },                                                         { 9, 9, 27 },
+                                  { 10, 2, 28 },                              { 10, 4, 29 },                                                      { 10, 10, 30 }
+   } );
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 49 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+   m.reset();
+
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_GetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 11x11 sparse matrix:
+    *
+    /  1  2  4  7                       \
+    |  2  3  5  8 10 13 16 19           |
+    |  4  5  6    11 14       22 25 28  |
+    |  7  8     9       17 20           |
+    |    10 11    12          23 26     |
+    |    13 14       15             29  |
+    |    16    17       18              |
+    |    19    20          21           |
+    |       22    23          24        |
+    |       25    26             27     |
+    \       28       29             30  /
+    */
+
+   Matrix m( { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 11 );
+
+   auto matrixView = m.getView();
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto row = matrixView.getRow( rowIdx );
+      RealType val;
+      switch( rowIdx )
+      {
+         case  0: row.setElement( 0, 0,  1 ); break;
+         case  1: row.setElement( 0, 0,  2 ); row.setElement( 1, 1,  3 ); break;
+         case  2: row.setElement( 0, 0,  4 ); row.setElement( 1, 1,  5 ); row.setElement( 2,  2,  6 ); break;
+         case  3: row.setElement( 0, 0,  7 ); row.setElement( 1, 1,  8 ); row.setElement( 2,  3,  9 ); break;
+         case  4: row.setElement( 0, 1, 10 ); row.setElement( 1, 2, 11 ); row.setElement( 2,  4, 12 ); break;
+         case  5: row.setElement( 0, 1, 13 ); row.setElement( 1, 2, 14 ); row.setElement( 2,  5, 15 ); break;
+         case  6: row.setElement( 0, 1, 16 ); row.setElement( 1, 3, 17 ); row.setElement( 2,  6, 18 ); break;
+         case  7: row.setElement( 0, 1, 19 ); row.setElement( 1, 3, 20 ); row.setElement( 2,  7, 21 ); break;
+         case  8: row.setElement( 0, 2, 22 ); row.setElement( 1, 4, 23 ); row.setElement( 2,  8, 24 ); break;
+         case  9: row.setElement( 0, 2, 25 ); row.setElement( 1, 4, 26 ); row.setElement( 2,  9, 27 ); break;
+         case 10: row.setElement( 0, 2, 28 ); row.setElement( 1, 5, 29 ); row.setElement( 2, 10, 30 ); break;
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, m.getRows(), f );
+
+   EXPECT_EQ( m.getElement( 0,  0 ),  1 );
+   EXPECT_EQ( m.getElement( 0,  1 ),  2 );
+   EXPECT_EQ( m.getElement( 0,  2 ),  4 );
+   EXPECT_EQ( m.getElement( 0,  3 ),  7 );
+   EXPECT_EQ( m.getElement( 0,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 0,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1,  0 ),  2 );
+   EXPECT_EQ( m.getElement( 1,  1 ),  3 );
+   EXPECT_EQ( m.getElement( 1,  2 ),  5 );
+   EXPECT_EQ( m.getElement( 1,  3 ),  8 );
+   EXPECT_EQ( m.getElement( 1,  4 ), 10 );
+   EXPECT_EQ( m.getElement( 1,  5 ), 13 );
+   EXPECT_EQ( m.getElement( 1,  6 ), 16 );
+   EXPECT_EQ( m.getElement( 1,  7 ), 19 );
+   EXPECT_EQ( m.getElement( 1,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 1,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2,  0 ),  4 );
+   EXPECT_EQ( m.getElement( 2,  1 ),  5 );
+   EXPECT_EQ( m.getElement( 2,  2 ),  6 );
+   EXPECT_EQ( m.getElement( 2,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 2,  4 ), 11 );
+   EXPECT_EQ( m.getElement( 2,  5 ), 14 );
+   EXPECT_EQ( m.getElement( 2,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 2,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 2,  8 ), 22 );
+   EXPECT_EQ( m.getElement( 2,  9 ), 25 );
+   EXPECT_EQ( m.getElement( 2, 10 ), 28 );
+
+   EXPECT_EQ( m.getElement( 3,  0 ),  7 );
+   EXPECT_EQ( m.getElement( 3,  1 ),  8 );
+   EXPECT_EQ( m.getElement( 3,  2 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  3 ),  9 );
+   EXPECT_EQ( m.getElement( 3,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  6 ), 17 );
+   EXPECT_EQ( m.getElement( 3,  7 ), 20 );
+   EXPECT_EQ( m.getElement( 3,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 3,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  1 ), 10 );
+   EXPECT_EQ( m.getElement( 4,  2 ), 11 );
+   EXPECT_EQ( m.getElement( 4,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  4 ), 12 );
+   EXPECT_EQ( m.getElement( 4,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 4,  8 ), 23 );
+   EXPECT_EQ( m.getElement( 4,  9 ), 26 );
+   EXPECT_EQ( m.getElement( 4, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  1 ), 13 );
+   EXPECT_EQ( m.getElement( 5,  2 ), 14 );
+   EXPECT_EQ( m.getElement( 5,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  5 ), 15 );
+   EXPECT_EQ( m.getElement( 5,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 5,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 10 ), 29 );
+
+   EXPECT_EQ( m.getElement( 6,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  1 ), 16 );
+   EXPECT_EQ( m.getElement( 6,  2 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  3 ), 17 );
+   EXPECT_EQ( m.getElement( 6,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  6 ), 18 );
+   EXPECT_EQ( m.getElement( 6,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 6,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  1 ), 19 );
+   EXPECT_EQ( m.getElement( 7,  2 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  3 ), 20 );
+   EXPECT_EQ( m.getElement( 7,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  7 ), 21 );
+   EXPECT_EQ( m.getElement( 7,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 7,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  1 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  2 ), 22 );
+   EXPECT_EQ( m.getElement( 8,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  4 ), 23 );
+   EXPECT_EQ( m.getElement( 8,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 8,  8 ), 24 );
+   EXPECT_EQ( m.getElement( 8,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  1 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  2 ), 25 );
+   EXPECT_EQ( m.getElement( 9,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  4 ), 26 );
+   EXPECT_EQ( m.getElement( 9,  5 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 9,  9 ), 27 );
+   EXPECT_EQ( m.getElement( 9, 10 ),  0 );
+
+   EXPECT_EQ( m.getElement( 10,  0 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  1 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  2 ), 28 );
+   EXPECT_EQ( m.getElement( 10,  3 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  4 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  5 ), 29 );
+   EXPECT_EQ( m.getElement( 10,  6 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  7 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  8 ),  0 );
+   EXPECT_EQ( m.getElement( 10,  9 ),  0 );
+   EXPECT_EQ( m.getElement( 10, 10 ), 30 );
+}
+
+
+template< typename Matrix >
+void test_SetElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 10x10 sparse matrix:
+    *
+    *    /  1  0  0  4  0  0 10  0  0  0  \
+    *    |  0  2  0  5  0  0 11  0  0  0  |
+    *    |  0  0  3  6  0  0 12  0  0  0  |
+    *    |  4  5  6  7  0  0 13  0  0  0  |
+    *    |  0  0  0  0  8  0 14  0  0  0  |
+    *    |  0  0  0  0  0  9 15  0  0  0  |
+    *    | 10 11 12 13 14 15 16  0  0  0  |
+    *    |  0  0  0  0  0  0  0 17  0  0  |
+    *    |  0  0  0  0  0  0  0  0 18  0  |
+    *    \  0  0  0  0  0  0  0  0  0 19 /
+    */
+
+   Matrix m( { 1, 1, 1, 4, 1, 1, 7, 1, 1, 1 }, 10 );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )
+      m.setElement( i, i, value++ );
+
+   for( IndexType i = 0; i < 4; i++ )
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 4; i < 6; i++ )
+      m.setElement( i, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 7; i < 10; i++ )
+      m.setElement( i, i, value++ );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  4 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ), 10 );
+   EXPECT_EQ( m.getElement( 0, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ), 11 );
+   EXPECT_EQ( m.getElement( 1, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  3 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  4 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 6 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  8 );
+   EXPECT_EQ( m.getElement( 4, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 6 ), 14 );
+   EXPECT_EQ( m.getElement( 4, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  9 );
+   EXPECT_EQ( m.getElement( 5, 6 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ), 10 );
+   EXPECT_EQ( m.getElement( 6, 1 ), 11 );
+   EXPECT_EQ( m.getElement( 6, 2 ), 12 );
+   EXPECT_EQ( m.getElement( 6, 3 ), 13 );
+   EXPECT_EQ( m.getElement( 6, 4 ), 14 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 15 );
+   EXPECT_EQ( m.getElement( 6, 6 ), 16 );
+   EXPECT_EQ( m.getElement( 6, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 7, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 7 ), 17 );
+   EXPECT_EQ( m.getElement( 7, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 7, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 8, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 8, 8 ), 18 );
+   EXPECT_EQ( m.getElement( 8, 9 ),  0 );
+
+   EXPECT_EQ( m.getElement( 9, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 6 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 7 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 8 ),  0 );
+   EXPECT_EQ( m.getElement( 9, 9 ), 19 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  2  3  4  0  0 |
+    *    |  0  4  5  6  0 |
+    *    |  0  0  6  7  8 |
+    *    |  0  0  0  8  9 |
+    *    \  0  0  0  0 10 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( 6, 5, {
+      { 0, 0, 1 },
+      { 1, 0, 2 }, { 1, 1, 3 },
+                   { 2, 1, 4 }, { 2, 2, 5 },
+                                { 3, 2, 6 }, { 3, 3, 7 },
+                                             { 4, 3, 8 }, { 4, 4,  9 },
+                                                          { 5, 4, 10 } } );
+
+   // Check the set elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 3, 4 ),  8 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 4, 4 ),  9 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 10 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *    /  1  2  0  0  0 \   /  0  1  0  0  0 \   /  2  5  0  0  0 \
+    *    |  2  3  4  0  0 |   |  1  0  1  0  0 |   |  5  6  9  0  0 |
+    * 2  |  0  4  5  6  0 | + |  0  1  0  1  0 | = |  0  9 10 13  0 |
+    *    |  0  0  6  7  8 |   |  0  0  1  0  1 |   |  0  0 13 14 17 |
+    *    |  0  0  0  8  9 |   |  0  0  0  1  0 |   |  0  0  0 17 18 |
+    *    \  0  0  0  0 10 /   \  0  0  0  0  1 /   \  0  0  0  0 21 /
+    */
+
+   for( IndexType i = 0; i < rows; i++ )
+   {
+      if( i > 0 )
+         m.addElement( i, i - 1, 1.0, 2.0 );
+      if( i < cols )
+         m.addElement( i, i, 0.0, 2.0 );
+   }
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 17 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 17 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 18 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 21 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+   /**
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  0  0  0 \
+    *    |  0  2  3  4 |
+    *    |  0  3  0  5 |
+    *    \  0  4  5  0 /
+    */
+
+   const IndexType m_rows_1 = 4;
+   const IndexType m_cols_1 = 4;
+
+   Matrix m_1( m_rows_1, m_cols_1, {
+      { 0, 0, 1 },
+                   { 1, 1, 2 },
+                   { 2, 1, 3 },
+                   { 3, 1, 4 }, { 3, 2, 5 } } );
+
+   VectorType inVector_1( m_cols_1, 2.0 );
+   VectorType outVector_1( m_rows_1, 0.0 );
+   m_1.vectorProduct( inVector_1, outVector_1 );
+
+   EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
+   EXPECT_EQ( outVector_1.getElement( 1 ), 18 );
+   EXPECT_EQ( outVector_1.getElement( 2 ), 16 );
+   EXPECT_EQ( outVector_1.getElement( 3 ), 18 );
+
+   /**
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  0 \
+    *    |  2  0  6  8 |
+    *    |  3  6  7  0 |
+    *    \  0  8  0  9 /
+    */
+
+   const IndexType m_rows_2 = 4;
+   const IndexType m_cols_2 = 4;
+
+   Matrix m_2( m_rows_2, m_cols_2, {
+      { 0, 0, 1 },
+      { 1, 0, 2 },
+      { 2, 0, 3 }, { 2, 1, 6 }, { 2, 2, 7 },
+                   { 3, 1, 8 },              { 3, 3, 9 } } );
+
+   VectorType inVector_2( m_cols_2, 2 );
+   VectorType outVector_2( m_rows_2, 0 );
+   m_2.vectorProduct( inVector_2, outVector_2 );
+
+   EXPECT_EQ( outVector_2.getElement( 0 ), 12 );
+   EXPECT_EQ( outVector_2.getElement( 1 ), 32 );
+   EXPECT_EQ( outVector_2.getElement( 2 ), 32 );
+   EXPECT_EQ( outVector_2.getElement( 3 ), 34 );
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  1  2  3  4 \
+    *    |  2  5  0  0 |
+    *    |  3  0  6  0 |
+    *    \  4  0  0  7 /
+    */
+
+   const IndexType m_rows_3 = 4;
+   const IndexType m_cols_3 = 4;
+
+   Matrix m_3( m_rows_3, m_cols_3, {
+      { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, { 0, 3, 4 },
+      { 1, 0, 2 }, { 1, 1, 5 },
+      { 2, 0, 3 }, { 2, 2, 6 },
+      { 3, 0, 4 }, { 3, 3, 7 }
+   } );
+
+   VectorType inVector_3( { 0, 1, 2, 3 } );
+   VectorType outVector_3( m_rows_3, 0 );
+   m_3.vectorProduct( inVector_3, outVector_3 );
+
+   EXPECT_EQ( outVector_3.getElement( 0 ), 20 );
+   EXPECT_EQ( outVector_3.getElement( 1 ),  5 );
+   EXPECT_EQ( outVector_3.getElement( 2 ), 12 );
+   EXPECT_EQ( outVector_3.getElement( 3 ), 21 );
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  0  3  0  9  0 15  0 \
+    *    |  0  2  0  6  0 12  0 19 |
+    *    |  3  0  5  0 10  0 16  0 |
+    *    |  0  6  0  8  0 13  0 20 |
+    *    |  9  0 10  0 11  0 17  0 |
+    *    |  0 12  0 13  0 14  0 21 |
+    *    | 15  0 16  0 17  0 18  0 |
+    *    \  0 19  0 20  0 21  0 22 /
+    */
+
+   const IndexType m_rows_4 = 8;
+   const IndexType m_cols_4 = 8;
+
+   Matrix m_4( m_rows_4, m_cols_4, {
+      { 0, 0,  1 },
+                    { 1, 1,  2 },
+      { 2, 0,  3 },               { 2, 2, 5 },
+                    { 3, 1,  6 },               { 3, 3, 8 },
+      { 4, 0,  9 },               { 4, 2, 10 },               { 4, 4, 11 },
+                    { 5, 1, 12 },               { 5, 3, 13 },               { 5, 5, 14 },
+      { 6, 0, 15 },               { 6, 2, 16 },               { 6, 4, 17 },               { 6, 6, 18 },
+                    { 7, 1, 19 },               { 7, 3, 20 },               { 7, 5, 21 },               { 7, 7, 22 }
+   } );
+
+   VectorType inVector_4 { 1, 2, 1, 2, 1, 2, 1, 2 };
+   VectorType outVector_4( m_rows_4, 0 );
+   m_4.vectorProduct( inVector_4, outVector_4 );
+
+   EXPECT_EQ( outVector_4.getElement( 0 ),  28 );
+   EXPECT_EQ( outVector_4.getElement( 1 ),  78 );
+   EXPECT_EQ( outVector_4.getElement( 2 ),  34 );
+   EXPECT_EQ( outVector_4.getElement( 3 ),  94 );
+   EXPECT_EQ( outVector_4.getElement( 4 ),  47 );
+   EXPECT_EQ( outVector_4.getElement( 5 ), 120 );
+   EXPECT_EQ( outVector_4.getElement( 6 ),  66 );
+   EXPECT_EQ( outVector_4.getElement( 7 ), 164 );
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  0  0  0  0  0  0  0 \
+    *    |  0  2  0  0  0  0  0  0 |
+    *    |  0  0  3  4  6  9  0  0 |
+    *    |  0  0  4  5  7 10  0  0 |
+    *    |  0  0  6  7  8 11  0  0 |
+    *    |  0  0  9 10 11 12  0  0 |
+    *    |  0  0  0  0  0  0 13  0 |
+    *    \  0  0  0  0  0  0  0 14 /
+    */
+
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+
+   Matrix m_5( m_rows_5, m_cols_5,{
+      { 0, 0, 1 },
+                   { 1, 1, 2, },
+                                 { 2, 2, 3 },
+                                 { 3, 2, 4 }, { 3, 3,  5 },
+                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 },
+                                 { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 },
+                                                                                        { 6, 6, 13 },
+                                                                                                      { 7, 7, 14 }
+   } );
+
+   VectorType inVector_5( { 1, 2, 3, 4, 5, 6, 7, 8 } );
+   VectorType outVector_5( m_rows_5, 0.0 );
+   m_5.vectorProduct( inVector_5, outVector_5 );
+
+   EXPECT_EQ( outVector_5.getElement( 0 ), 1*1 );
+   EXPECT_EQ( outVector_5.getElement( 1 ), 2*2 );
+   EXPECT_EQ( outVector_5.getElement( 2 ), 3*3 + 4*4  + 5*6  + 6*9 );
+   EXPECT_EQ( outVector_5.getElement( 3 ), 3*4 + 4*5  + 5*7  + 6*10 );
+   EXPECT_EQ( outVector_5.getElement( 4 ), 3*6 + 4*7  + 5*8  + 6*11 );
+   EXPECT_EQ( outVector_5.getElement( 5 ), 3*9 + 4*10 + 5*11 + 6*12 );
+   EXPECT_EQ( outVector_5.getElement( 6 ), 7*13 );
+   EXPECT_EQ( outVector_5.getElement( 7 ), 8*14 );
+}
+
+template< typename Matrix >
+void test_RowsReduction()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  0  0  0  0  0  0  0 \
+    *    |  0  2  0  0  0  0  0  0 |
+    *    |  0  0  3  4  6  9  0  0 |
+    *    |  0  0  4  5  7 10  0  0 |
+    *    |  0  0  6  7  8 11  0  0 |
+    *    |  0  0  9 10 11 12  0  0 |
+    *    |  0  0  0  0  0  0 13  0 |
+    *    \  0  0  0  0  0  0  0 14 /
+    */
+
+   const IndexType m_rows_5 = 8;
+   const IndexType m_cols_5 = 8;
+
+   Matrix m_5( m_rows_5, m_cols_5,{
+      { 0, 0, 1 },
+                   { 1, 1, 2, },
+                                 { 2, 2, 3 },
+                                 { 3, 2, 4 }, { 3, 3,  5 },
+                                 { 4, 2, 6 }, { 4, 3,  7 }, { 4, 4,  8 },
+                                 { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 },
+                                                                                        { 6, 6, 13 },
+                                                                                                      { 7, 7, 14 }
+   } );
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( m_rows_5 );
+   typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } );
+   auto rowLengths_view = rowLengths.getView();
+   rowLengths_view = 0;
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) mutable -> IndexType {
+      if( value != 0.0 && row != column)
+         TNL::Algorithms::AtomicOperations< DeviceType >::add( rowLengths_view[ column ], ( IndexType ) 1 );
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] += value;
+   };
+   m_5.allRowsReduction( fetch, reduce, keep, 0 );
+
+   EXPECT_EQ( rowLengths_true, rowLengths );
+   m_5.getCompressedRowLengths( rowLengths );
+   typename Matrix::RowsCapacitiesType rowLengths_symmetric( { 1, 1, 1, 2, 3, 4, 1, 1 } );
+   EXPECT_EQ( rowLengths_symmetric, rowLengths );
+
+   ////
+   // Compute max norm
+   /*TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+      return abs( value );
+   };
+   auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36*/
+}
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols, {
+      { 0, 0, 4 }, { 0, 1, 1 },
+      { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 },
+                   { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 },
+                                { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 },
+                                             { 4, 3, 1 }, { 4, 4, 4 }
+   } );
+
+   RealType bVector [ 4 ] = { 1, 1, 1, 1 };
+   RealType xVector [ 4 ] = { 1, 1, 1, 1 };
+
+   IndexType row = 0;
+   RealType omega = 1;
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 1.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 1.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 1.0 );
+
+   m.performSORIteration( bVector, row++, xVector, omega);
+
+   EXPECT_EQ( xVector[ 0 ], 0.0 );
+   EXPECT_EQ( xVector[ 1 ], 0.0 );
+   EXPECT_EQ( xVector[ 2 ], 0.0 );
+   EXPECT_EQ( xVector[ 3 ], 0.25 );
+}
+
+template< typename Matrix >
+void test_SaveAndLoad( const char* filename )
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 6x5 sparse matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  2  3  4  0  0 |
+    *    |  0  4  5  6  0 |
+    *    |  0  0  6  7  8 |
+    *    |  0  0  0  8  9 |
+    *    \  0  0  0  0 10 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix savedMatrix( 6, 5, {
+      { 0, 0, 1 },
+      { 1, 0, 2 }, { 1, 1, 3 },
+                   { 2, 1, 4 }, { 2, 2, 5 },
+                                { 3, 2, 6 }, { 3, 3, 7 },
+                                             { 4, 3, 8 }, { 4, 4,  9 },
+                                                          { 5, 4, 10 } } );
+
+   // Check the set elements
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ),  4 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 4 ),  8 );
+
+   EXPECT_EQ( savedMatrix.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 3 ),  8 );
+   EXPECT_EQ( savedMatrix.getElement( 4, 4 ),  9 );
+
+   EXPECT_EQ( savedMatrix.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 5, 4 ), 10 );
+
+   ASSERT_NO_THROW( savedMatrix.save( filename ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( filename ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 4 ), loadedMatrix.getElement( 0, 4 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 4 ), loadedMatrix.getElement( 1, 4 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 4 ), loadedMatrix.getElement( 2, 4 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 4 ), loadedMatrix.getElement( 3, 4 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 4, 0 ), loadedMatrix.getElement( 4, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 1 ), loadedMatrix.getElement( 4, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 2 ), loadedMatrix.getElement( 4, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 3 ), loadedMatrix.getElement( 4, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 4, 4 ), loadedMatrix.getElement( 4, 4 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 5, 0 ), loadedMatrix.getElement( 5, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 1 ), loadedMatrix.getElement( 5, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 2 ), loadedMatrix.getElement( 5, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 3 ), loadedMatrix.getElement( 5, 3 ) );
+   EXPECT_EQ( savedMatrix.getElement( 5, 4 ), loadedMatrix.getElement( 5, 4 ) );
+   EXPECT_EQ( std::remove( filename ), 0 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 sparse matrix:
+    *
+    *    /  4  1  0  0 \
+    *    |  1  4  1  0 |
+    *    |  0  1  4  1 |
+    *    \  0  0  1  4 /
+    */
+
+   const IndexType m_rows = 4;
+   const IndexType m_cols = 4;
+
+   Matrix m( m_rows, m_cols, {
+      { 0, 0, 4 },
+      { 1, 0, 1 }, { 1, 1, 4 },
+                   { 2, 1, 1 }, { 2, 2, 4 },
+                                { 3, 2, 1 }, { 3, 3, 4 }
+   } );
+
+   std::stringstream printed;
+   std::stringstream couted;
+
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+   m.print( std::cout ); //all the std::cout goes to ss
+
+   std::cout.rdbuf(old_buf); //reset
+
+   couted << "Row: 0 ->  Col:0->4	 Col:1->1\t\n"
+             "Row: 1 ->  Col:0->1	 Col:1->4	 Col:2->1\t\n"
+             "Row: 2 ->  Col:1->1	 Col:2->4	 Col:3->1\t\n"
+             "Row: 3 ->  Col:2->1	 Col:3->4\t\n";
+
+   EXPECT_EQ( printed.str(), couted.str() );
+}
+
+#endif
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c23fa4242090ca3c441df81f4fbd6b1583b833d2
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest_CSR.cpp -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SymmetricSparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu
new file mode 100644
index 0000000000000000000000000000000000000000..df1d83da0e08cefc0bc314e01ec216bda1905f4a
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest_CSR.cu -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SymmetricSparseMatrixTest_CSR.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
new file mode 100644
index 0000000000000000000000000000000000000000..f6f7ec95a3a93aa6917d88a0f014ddbc44ca92ef
--- /dev/null
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
@@ -0,0 +1,61 @@
+/***************************************************************************
+                          SymmetricSparseMatrixTest_CSR.h -  description
+                             -------------------
+    begin                : Feb 11, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+#include <TNL/Containers/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+// test fixture for typed tests
+//template< typename Matrix >
+//class MatrixTest : public ::testing::Test
+//{
+//protected:
+//   using MatrixType = Matrix;
+//};
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
+#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU.
+   ,//TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
+    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
+#endif // HAVE_CUDA
+>;
+
+const char* saveAndLoadTestFileName = "test_SymmetricSparseMatrixTest_CSR_segments";
+
+#include "SymmetricSparseMatrixTest.h"
+
+#endif // HAVE_GTEST
+
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3248d352654f119d3a8d6cb2290e92b320d0dc97
--- /dev/null
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          TridiagonalMatrixTest.cpp -  description
+                             -------------------
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "TridiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..16f909fa78a3725ee9040299be7fe2ec6908514d
--- /dev/null
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          TridiagonalMatrixTest.cu -  description
+                             -------------------
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "TridiagonalMatrixTest.h"
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..d9dc06599981ca920780bfe52e35e09e8f65f854
--- /dev/null
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -0,0 +1,1636 @@
+/***************************************************************************
+                          TridiagonalMatrixTest.h -  description
+                             -------------------
+    begin                : Jan 8, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <sstream>
+#include <TNL/Devices/Host.h>
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Containers/Array.h>
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Math.h>
+#include <iostream>
+
+using Tridiagonal_host_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >;
+using Tridiagonal_host_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >;
+
+using Tridiagonal_cuda_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >;
+using Tridiagonal_cuda_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >;
+
+static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl";
+
+#ifdef HAVE_GTEST
+#include <type_traits>
+
+#include <gtest/gtest.h>
+
+void test_GetSerializationType()
+{
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+}
+
+template< typename Matrix >
+void test_SetDimensions()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 9;
+   const IndexType cols = 8;
+
+   Matrix m;
+   m.setDimensions( rows, cols );
+
+   EXPECT_EQ( m.getRows(), 9 );
+   EXPECT_EQ( m.getColumns(), 8 );
+}
+
+template< typename Matrix1, typename Matrix2 >
+void test_SetLike()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+
+   Matrix1 m1;
+   m1.reset();
+   m1.setDimensions( rows + 1, cols + 2 );
+
+   Matrix2 m2;
+   m2.reset();
+   m2.setDimensions( rows, cols );
+
+   m1.setLike( m2 );
+
+   EXPECT_EQ( m1.getRows(), m2.getRows() );
+   EXPECT_EQ( m1.getColumns(), m2.getColumns() );
+}
+
+template< typename Matrix >
+void test_GetCompressedRowLengths()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 10;
+   const IndexType cols = 11;
+
+   Matrix m( rows, cols );
+
+   // Insert values into the rows.
+   RealType value = 1;
+
+   for( IndexType i = 0; i < 2; i++ )  // 0th row -> 2 elements
+      m.setElement( 0, i, value++ );
+
+   for( IndexType i = 0; i < 3; i++ )  // 1st row -> 3 elements
+      m.setElement( 1, i, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )  // 2nd row -> 2 elements
+      m.setElement( 2, i, value++ );
+
+   for( IndexType i = 2; i < 5; i++ )  // 3rd row -> 3 elements
+      m.setElement( 3, i, value++ );
+
+   for( IndexType i = 3; i < 6; i++ )  // 4th row -> 3 elements
+      m.setElement( 4, i, value++ );
+
+   for( IndexType i = 4; i < 6; i++ )  // 5th row -> 2 elements
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 5; i < 8; i++ )  // 6th row -> 3 elements
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 6; i < 8; i++ )  // 7th row -> 2 elements
+      m.setElement( 7, i, value++ );
+
+   for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements
+      m.setElement( 8, i, value++ );
+
+   for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements
+      m.setElement( 9, i, value++ );
+
+   typename Matrix::CompressedRowLengthsVector rowLengths( rows );
+   rowLengths = 0;
+   m.getCompressedRowLengths( rowLengths );
+   typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 };
+   EXPECT_EQ( rowLengths, correctRowLengths );
+}
+
+template< typename Matrix >
+void test_GetRowLength()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 8;
+   const IndexType cols = 7;
+
+   Matrix m( rows, cols );
+
+   EXPECT_EQ( m.getRowLength( 0 ), 2 );
+   EXPECT_EQ( m.getRowLength( 1 ), 3 );
+   EXPECT_EQ( m.getRowLength( 2 ), 3 );
+   EXPECT_EQ( m.getRowLength( 3 ), 3 );
+   EXPECT_EQ( m.getRowLength( 4 ), 3 );
+   EXPECT_EQ( m.getRowLength( 5 ), 3 );
+   EXPECT_EQ( m.getRowLength( 6 ), 2 );
+   EXPECT_EQ( m.getRowLength( 7 ), 1 );
+}
+
+template< typename Matrix >
+void test_GetAllocatedElementsCount()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   const IndexType rows = 7;
+   const IndexType cols = 6;
+
+   Matrix m( rows, cols );
+
+   EXPECT_EQ( m.getAllocatedElementsCount(), 21 );
+}
+
+template< typename Matrix >
+void test_GetNumberOfNonzeroMatrixElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 7x6 matrix:
+    *
+    *    /  0  1  0  0  0  0 \
+    *    |  2  3  4  0  0  0 |
+    *    |  0  5  6  7  0  0 |
+    *    |  0  0  8  9 10  0 |
+    *    |  0  0  0 11 12 13 |
+    *    |  0  0  0  0 14  0 |
+    *    \  0  0  0  0  0 16 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
+
+   Matrix m( rows, cols );
+
+   RealType value = 0;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
+         m.setElement( i, j, value++ );
+
+   m.setElement( 5, 5, 0);
+
+   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
+}
+
+template< typename Matrix >
+void test_Reset()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  0  0  0  0 \
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    |  0  0  0  0 |
+    *    \  0  0  0  0 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+
+   m.reset();
+
+   EXPECT_EQ( m.getRows(), 0 );
+   EXPECT_EQ( m.getColumns(), 0 );
+}
+
+template< typename Matrix >
+void test_SetValue()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 7x6 matrix:
+    *
+    *    /  0  1  0  0  0  0 \
+    *    |  2  3  4  0  0  0 |
+    *    |  0  5  6  7  0  0 |
+    *    |  0  0  8  9 10  0 |
+    *    |  0  0  0 11 12 13 |
+    *    |  0  0  0  0 14  0 |
+    *    \  0  0  0  0  0 16 /
+    */
+   const IndexType rows = 7;
+   const IndexType cols = 6;
+
+   Matrix m( rows, cols );
+
+   RealType value = 0;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ )
+         m.setElement( i, j, value++ );
+
+   m.setElement( 5, 5, 0);
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  2 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  5 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 3, 3 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 11 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 13 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 14 );
+   EXPECT_EQ( m.getElement( 5, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 16 );
+
+   // Set the values of all elements to a certain number
+   m.setValue( 42 );
+
+   EXPECT_EQ( m.getElement( 0, 0 ), 42 );
+   EXPECT_EQ( m.getElement( 0, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 42 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 42 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 42 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 42 );
+
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 42 );
+}
+
+template< typename Matrix >
+void test_SetElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x5 matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    \  0  0  0 24 25 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) > 1 )
+         {
+            EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error );
+         }
+         else
+            m.setElement( i, j, value++ );
+      }
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+}
+
+template< typename Matrix >
+void test_AddElement()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 6x5 matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    |  0  0  0 24 25 |
+    *    \  0  0  0  0 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+        {
+           if( abs( i - j ) <= 1 )
+               m.setElement( i, j, value );
+           value++;
+        }
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 matrix:
+    *
+    *     /  1  2  0  0  0 \    /  1  2  0  0  0 \   /  3  6  0  0  0 \
+    *     |  6  7  8  0  0 |    |  3  4  5  0  0 |   | 15 18 21  0  0 |
+    * 2 * |  0 12 13 14  0 |  + |  0  6  7  8  0 | = |  0 30 33 36  0 |
+    *     |  0  0 18 19 20 |    |  0  0  9 10 11 |   |  0  0 45 48 51 |
+    *     |  0  0  0 24 25 |    |  0  0  0 12 13 |   |  0  0  0 60 63 |
+    *     \  0  0  0  0 30 /    \  0  0  0  0 14 /   \  0  0  0  0 74 /
+    */
+
+   RealType newValue = 1;
+   RealType multiplicator = 2;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+         if( abs( i - j ) <= 1 )
+            m.addElement( i, j, newValue++, multiplicator );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ), 15 );
+   EXPECT_EQ( m.getElement( 1, 1 ), 18 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 21 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 30 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 33 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 36 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 45  );
+   EXPECT_EQ( m.getElement( 3, 3 ), 48 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 51 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 60 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 63 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 74 );
+}
+
+template< typename Matrix >
+void test_SetRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 3x7 matrix:
+    *
+    *    /  1  2  0  0  0  0  0 \
+    *    |  8  9 10  0  0  0  0 |
+    *    \  0 16 17 18  0  0  0 /
+    */
+   const IndexType rows = 3;
+   const IndexType cols = 7;
+
+   Matrix m( rows, cols );
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 3 ][ 3 ] {
+         {  1,  2,  0 },
+         {  8,  9, 10 },
+         { 16, 17, 18 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 3; i++ )
+      {
+         if( rowIdx == 0 && i > 1 )
+            break;
+         row.setElement( i, values[ rowIdx ][ i ] );
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  9 );
+   EXPECT_EQ( m.getElement( 1, 2 ), 10 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 6 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 16 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 17 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 18 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 6 ),  0 );
+}
+
+template< typename Matrix >
+void test_AddRow()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   /*
+    * Sets up the following 6x5 matrix:
+    *
+    *    /  1  2  0  0  0 \
+    *    |  6  7  8  0  0 |
+    *    |  0 12 13 14  0 |
+    *    |  0  0 18 19 20 |
+    *    |  0  0  0 24 25 |
+    *    \  0  0  0  0 30 /
+    */
+
+   const IndexType rows = 6;
+   const IndexType cols = 5;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   // Check the added elements
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  6 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  7 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  8 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ), 12 );
+   EXPECT_EQ( m.getElement( 2, 2 ), 13 );
+   EXPECT_EQ( m.getElement( 2, 3 ), 14 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ), 18 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 19 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 20 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 24 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 25 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 30 );
+
+   // Add new elements to the old elements with a multiplying factor applied to the old elements.
+   /*
+    * The following setup results in the following 6x5 sparse matrix:
+    *
+    *  / 0  0  0  0  0  0 \   /  1  2  0  0  0 \   / 11 11  0  0  0 \   / 11  11  0   0   0 \
+    *  | 0  1  0  0  0  0 |   |  6  7  8  0  0 |   | 22 22 22  0  0 |   | 28  29 30   0   0 |
+    *  | 0  0  2  0  0  0 | * |  0 12 13 14  0 | + |  0 33 33 33  0 | = |  0  57 59  61   0 |
+    *  | 0  0  0  3  0  0 |   |  0  0 18 19 20 |   |  0  0 44 44 44 |   |  0   0 98 101 104 |
+    *  | 0  0  0  0  4  0 |   |  0  0  0 24 25 |   |  0  0  0 55 55 |   |  0   0  0 151 155 |
+    *  \ 0  0  0  0  0  5 /   \  0  0  0  0 30 /   \  0  0  0  0 66 /   \  0   0  0   0 216 /
+    */
+
+   auto matrix_view = m.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      RealType values[ 6 ][ 3 ] {
+         { 11, 11,  0 },
+         { 22, 22, 22 },
+         { 33, 33, 33 },
+         { 44, 44, 44 },
+         { 55, 55, 55 },
+         { 66, 66, 66 } };
+      auto row = matrix_view.getRow( rowIdx );
+      for( IndexType i = 0; i < 3; i++ )
+      {
+         RealType& val = row.getValue( i );
+         val = rowIdx * val + values[ rowIdx ][ i ];
+      }
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f );
+
+
+   EXPECT_EQ( m.getElement( 0, 0 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  11 );
+   EXPECT_EQ( m.getElement( 0, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 1, 0 ),  28 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  29 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  30 );
+   EXPECT_EQ( m.getElement( 1, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),   0 );
+
+   EXPECT_EQ( m.getElement( 2, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  57 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  59 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  61 );
+   EXPECT_EQ( m.getElement( 2, 4 ),   0  );
+
+   EXPECT_EQ( m.getElement( 3, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  98 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 101 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 104 );
+
+   EXPECT_EQ( m.getElement( 4, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 151 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 155 );
+
+   EXPECT_EQ( m.getElement( 5, 0 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),   0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 216 );
+}
+
+template< typename Matrix >
+void test_VectorProduct()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
+
+   VectorType inVector( 4 );
+   inVector = 2;
+
+   VectorType outVector( 5 );
+   outVector = 0;
+
+   m.vectorProduct( inVector, outVector);
+
+   EXPECT_EQ( outVector.getElement( 0 ),  6 );
+   EXPECT_EQ( outVector.getElement( 1 ), 36 );
+   EXPECT_EQ( outVector.getElement( 2 ), 66 );
+   EXPECT_EQ( outVector.getElement( 3 ), 62 );
+   EXPECT_EQ( outVector.getElement( 4 ), 40 );
+}
+
+template< typename Matrix1, typename Matrix2 = Matrix1 >
+void test_AddMatrix()
+{
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix1 m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   /*
+    * Sets up the following 5x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  3  4  5  0 |
+    *    |  0  6  7  8 |
+    *    |  0  0  9 10 |
+    *    \  0  0  0 11 /
+    */
+   Matrix2 m2( rows, cols );
+
+   RealType newValue = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+         if( abs( i - j ) <= 1 )
+            m2.setElement( i, j, newValue++ );
+
+   /*
+    * Compute the following 5x4 matrix:
+    *
+    *  /  1  2  0  0 \       /  1  2  0  0 \    /  3  6  0  0 \
+    *  |  5  6  7  0 |       |  3  4  5  0 |    | 11 14 17  0 |
+    *  |  0 10 11 12 | + 2 * |  0  6  7  8 | =  |  0 22 25 28 |
+    *  |  0  0 15 16 |       |  0  0  9 10 |    |  0  0 33 36 |
+    *  \  0  0  0 20 /       \  0  0  0 11 /    \  0  0  0 42 /
+    */
+
+   Matrix1 mResult;
+   mResult.reset();
+   mResult.setDimensions( rows, cols );
+
+   mResult = m;
+
+   RealType matrixMultiplicator = 2;
+   RealType thisMatrixMultiplicator = 1;
+
+   mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+   EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+   EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+   EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+   EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+   EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+   EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+   EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( mResult.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), 11 );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), 14 );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), 17 );
+   EXPECT_EQ( mResult.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), 22 );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), 25 );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), 28 );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), 33 );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), 36 );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), 42 );
+}
+
+template< typename Matrix >
+void test_GetMatrixProduct()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 5x4 matrix:
+ *
+ *    /  1  2  3  4 \
+ *    |  5  6  7  8 |
+ *    |  9 10 11 12 |
+ *    | 13 14 15 16 |
+ *    \ 17 18 19 20 /
+ */
+    const IndexType leftRows = 5;
+    const IndexType leftCols = 4;
+
+    Matrix leftMatrix;
+    leftMatrix.reset();
+    leftMatrix.setDimensions( leftRows, leftCols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < leftRows; i++ )
+        for( IndexType j = 0; j < leftCols; j++)
+            leftMatrix.setElement( i, j, value++ );
+
+/*
+ * Sets up the following 4x5 matrix:
+ *
+ *    /  1  2  3  4  5 \
+ *    |  6  7  8  9 10 |
+ *    | 11 12 13 14 15 |
+ *    \ 16 17 18 19 20 /
+ */
+    const IndexType rightRows = 4;
+    const IndexType rightCols = 5;
+
+    Matrix rightMatrix;
+    rightMatrix.reset();
+    rightMatrix.setDimensions( rightRows, rightCols );
+
+    RealType newValue = 1;
+    for( IndexType i = 0; i < rightRows; i++ )
+        for( IndexType j = 0; j < rightCols; j++)
+            rightMatrix.setElement( i, j, newValue++ );
+
+/*
+ * Sets up the following 5x5 resulting matrix:
+ *
+ *    /  0  0  0  0 \
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    |  0  0  0  0 |
+ *    \  0  0  0  0 /
+ */
+
+    Matrix mResult;
+    mResult.reset();
+    mResult.setDimensions( leftRows, rightCols );
+    mResult.setValue( 0 );
+
+    RealType leftMatrixMultiplicator = 1;
+    RealType rightMatrixMultiplicator = 2;
+/*
+ *      /  1  2  3  4 \                            /  220  240  260  280  300 \
+ *      |  5  6  7  8 |       /  1  2  3  4  5 \   |  492  544  596  648  700 |
+ *  1 * |  9 10 11 12 | * 2 * |  6  7  8  9 10 | = |  764  848  932 1016 1100 |
+ *      | 13 14 15 16 |       | 11 12 13 14 15 |   | 1036 1152 1268 1384 1500 |
+ *      \ 17 18 19 20 /       \ 16 17 18 19 20 /   \ 1308 1456 1604 1752 1900 /
+ */
+
+    mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator );
+
+    EXPECT_EQ( mResult.getElement( 0, 0 ),  220 );
+    EXPECT_EQ( mResult.getElement( 0, 1 ),  240 );
+    EXPECT_EQ( mResult.getElement( 0, 2 ),  260 );
+    EXPECT_EQ( mResult.getElement( 0, 3 ),  280 );
+    EXPECT_EQ( mResult.getElement( 0, 4 ),  300 );
+
+    EXPECT_EQ( mResult.getElement( 1, 0 ),  492 );
+    EXPECT_EQ( mResult.getElement( 1, 1 ),  544 );
+    EXPECT_EQ( mResult.getElement( 1, 2 ),  596 );
+    EXPECT_EQ( mResult.getElement( 1, 3 ),  648 );
+    EXPECT_EQ( mResult.getElement( 1, 4 ),  700 );
+
+    EXPECT_EQ( mResult.getElement( 2, 0 ),  764 );
+    EXPECT_EQ( mResult.getElement( 2, 1 ),  848 );
+    EXPECT_EQ( mResult.getElement( 2, 2 ),  932 );
+    EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 );
+    EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 );
+
+    EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 );
+    EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 );
+    EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 );
+    EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 );
+    EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 );
+
+    EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 );
+    EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 );
+    EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 );
+    EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 );
+    EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 );
+}
+
+template< typename Matrix >
+void test_GetTransposition()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 3x2 matrix:
+ *
+ *    /  1  2 \
+ *    |  3  4 |
+ *    \  5  6 /
+ */
+    const IndexType rows = 3;
+    const IndexType cols = 2;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    RealType value = 1;
+    for( IndexType i = 0; i < rows; i++ )
+        for( IndexType j = 0; j < cols; j++ )
+            m.setElement( i, j, value++ );
+
+    m.print( std::cout );
+
+/*
+ * Sets up the following 2x3 matrix:
+ *
+ *    /  0  0  0 \
+ *    \  0  0  0 /
+ */
+    Matrix mTransposed;
+    mTransposed.reset();
+    mTransposed.setDimensions( cols, rows );
+
+    mTransposed.print( std::cout );
+
+    RealType matrixMultiplicator = 1;
+
+    mTransposed.getTransposition( m, matrixMultiplicator );
+
+    mTransposed.print( std::cout );
+
+/*
+ * Should result in the following 2x3 matrix:
+ *
+ *    /  1  3  5 \
+ *    \  2  4  6 /
+ */
+
+    EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 );
+    EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 );
+    EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 );
+
+    EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 );
+    EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 );
+    EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 );
+}
+
+
+template< typename Matrix >
+void test_PerformSORIteration()
+{
+    using RealType = typename Matrix::RealType;
+    using DeviceType = typename Matrix::DeviceType;
+    using IndexType = typename Matrix::IndexType;
+/*
+ * Sets up the following 4x4  matrix:
+ *
+ *    /  4  1  1  1 \
+ *    |  1  4  1  1 |
+ *    |  1  1  4  1 |
+ *    \  1  1  1  4 /
+ */
+    const IndexType rows = 4;
+    const IndexType cols = 4;
+
+    Matrix m;
+    m.reset();
+    m.setDimensions( rows, cols );
+
+    m.setElement( 0, 0, 4.0 );        // 0th row
+    m.setElement( 0, 1, 1.0 );
+    m.setElement( 0, 2, 1.0 );
+    m.setElement( 0, 3, 1.0 );
+
+    m.setElement( 1, 0, 1.0 );        // 1st row
+    m.setElement( 1, 1, 4.0 );
+    m.setElement( 1, 2, 1.0 );
+    m.setElement( 1, 3, 1.0 );
+
+    m.setElement( 2, 0, 1.0 );
+    m.setElement( 2, 1, 1.0 );        // 2nd row
+    m.setElement( 2, 2, 4.0 );
+    m.setElement( 2, 3, 1.0 );
+
+    m.setElement( 3, 0, 1.0 );        // 3rd row
+    m.setElement( 3, 1, 1.0 );
+    m.setElement( 3, 2, 1.0 );
+    m.setElement( 3, 3, 4.0 );
+
+    RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+    RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 };
+
+    IndexType row = 0;
+    RealType omega = 1;
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ],  1.0 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  1.0 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ],  0.15625 );
+    EXPECT_EQ( xVector[ 3 ],  1.0 );
+
+    m.performSORIteration( bVector, row++, xVector, omega);
+
+    EXPECT_EQ( xVector[ 0 ], -0.5 );
+    EXPECT_EQ( xVector[ 1 ], -0.125 );
+    EXPECT_EQ( xVector[ 2 ], 0.15625 );
+    EXPECT_EQ( xVector[ 3 ], 0.3671875 );
+}
+
+template< typename Matrix >
+void test_AssignmentOperator()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+   constexpr bool rowMajorOrder = Matrix::getRowMajorOrder();
+
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   TridiagonalHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j <  columns; j++ )
+         if( abs( i - j ) <= 1 )
+            hostMatrix.setElement( i, j,  i + j );
+
+   Matrix matrix( rows, columns );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+            if( abs( i - j ) <= 1 )
+               EXPECT_EQ( matrix.getElement( i, j ), i + j );
+            else
+               EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+
+#ifdef HAVE_CUDA
+   TridiagonalCuda cudaMatrix( rows, columns );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( abs( i - j ) <= 1 )
+            cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+      {
+         if( abs( i - j ) <= 1 )
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+      }
+#endif
+}
+
+
+template< typename Matrix >
+void test_SaveAndLoad()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 4x4 matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    \  0  0 15 16 /
+    */
+   const IndexType rows = 4;
+   const IndexType cols = 4;
+
+   Matrix savedMatrix( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) <= 1 )
+            savedMatrix.setElement( i, j, value );
+         value++;
+      }
+
+   ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+}
+
+template< typename Matrix >
+void test_Print()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
+
+   Matrix m( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++)
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
+
+   std::stringstream printed;
+   std::stringstream couted;
+
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
+
+   m.print( std::cout ); //all the std::cout goes to ss
+
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1\t Col:1->2\t\n"
+             "Row: 1 ->  Col:0->5\t Col:1->6\t Col:2->7\t\n"
+             "Row: 2 ->  Col:1->10\t Col:2->11\t Col:3->12\t\n"
+             "Row: 3 ->  Col:2->15\t Col:3->16\t\n"
+             "Row: 4 ->  Col:3->20\t\n";
+
+   EXPECT_EQ( printed.str(), couted.str() );
+}
+
+// test fixture for typed tests
+template< typename Matrix >
+class MatrixTest : public ::testing::Test
+{
+protected:
+   using MatrixType = Matrix;
+};
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, short >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, int >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, long >
+#ifdef HAVE_CUDA
+    ,TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, long >
+#endif
+>;
+
+TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
+
+TYPED_TEST( MatrixTest, getSerializationType )
+{
+   test_GetSerializationType();
+}
+
+TYPED_TEST( MatrixTest, setDimensionsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetDimensions< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setLikeTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetLike< MatrixType, MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetCompressedRowLengths< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getRowLengthTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetRowLength< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetAllocatedElementsCount< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, resetTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Reset< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setValueTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetValue< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addElementTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddElement< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, setRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SetRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addRowTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddRow< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, vectorProductTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_VectorProduct< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addMatrixTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AddMatrix< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    using RealType = typename MatrixType::RealType;
+    using DeviceType = typename MatrixType::DeviceType;
+    using IndexType = typename MatrixType::IndexType;
+    using RealAllocatorType = typename MatrixType::RealAllocatorType;
+    using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >;
+
+    test_AddMatrix< MatrixType, MatrixType2 >();
+}
+
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, saveAndLoadTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_SaveAndLoad< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, printTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_Print< MatrixType >();
+}
+
+//// test_getType is not general enough yet. DO NOT TEST IT YET.
+
+//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Host )
+//{
+//    host_test_GetType< Tridiagonal_host_float, Tridiagonal_host_int >();
+//}
+//
+//#ifdef HAVE_CUDA
+//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Cuda )
+//{
+//    cuda_test_GetType< Tridiagonal_cuda_float, Tridiagonal_cuda_int >();
+//}
+//#endif
+
+/*TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Host )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::TridiagonalMatrixProductKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, Tridiagonal_host_int *, const int, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Tridiagonal_host_int, Matrix2=Tridiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1315): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Cuda )
+{
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::TridiagonalMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Tridiagonal<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n";
+    std::cout << "                  instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Tridiagonal_cuda_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1332): here\n\n";
+}
+#endif
+
+TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Host )
+{
+//    test_GetTransposition< Tridiagonal_host_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on CPU, this test will not build, but will print the following message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n";
+    std::cout << "AND this message: \n";
+    std::cout << "      /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionNonAlignedKernel\" matches the argument list\n";
+    std::cout << "              argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, const int, int, int)\n";
+    std::cout << "          detected during:\n";
+    std::cout << "              instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n";
+    std::cout << "                  instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n";
+    std::cout << "              /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n";
+}
+
+#ifdef HAVE_CUDA
+TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Cuda )
+{
+//    test_GetTransposition< Tridiagonal_cuda_int >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched on GPU, this test throws the following message: \n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n";
+    std::cout << "      File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n";
+    std::cout << "      Line: 329 \n";
+    std::cout << "      Diagnostics: Not supported with CUDA.\n";
+    std::cout << "  terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n";
+    std::cout << "          what():  CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n";
+    std::cout << "  Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n";
+    std::cout << "  [1]    4003 abort (core dumped)  ./TridiagonalMatrixTest-dbg\n";
+}
+#endif
+
+TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Host )
+{
+    test_PerformSORIteration< Tridiagonal_host_float >();
+}
+
+#ifdef HAVE_CUDA
+TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Cuda )
+{
+//    test_PerformSORIteration< Tridiagonal_cuda_float >();
+    bool testRan = false;
+    EXPECT_TRUE( testRan );
+    std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
+    std::cout << "If launched, this test throws the following message: \n";
+    std::cout << "      [1]    6992 segmentation fault (core dumped)  ./SparseMatrixTest-dbg\n\n";
+    std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n";
+}
+#endif
+ * */
+
+#endif // HAVE_GTEST
+
+#include "../main.h"