diff --git a/Documentation/Examples/Matrices/CMakeLists.txt b/Documentation/Examples/Matrices/CMakeLists.txt
index 710524aaa5bcf756e6814352e6be8d5d0dc296af..8e4f5b37d658d74a13ed3d949bca36a25feacb21 100644
--- a/Documentation/Examples/Matrices/CMakeLists.txt
+++ b/Documentation/Examples/Matrices/CMakeLists.txt
@@ -1,161 +1,5 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list_cuda DenseMatrixExample_Constructor_init_list.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out
-                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElements_cuda DenseMatrixExample_setElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements_cuda > 
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out
-                       OUTPUT DenseMatrixExample_setElements.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths_cuda DenseMatrixExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out
-                       OUTPUT DenseMatrixExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElementsCount_cuda DenseMatrixExample_getElementsCount.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out
-                       OUTPUT DenseMatrixExample_getElementsCount.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getConstRow_cuda DenseMatrixExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out
-                       OUTPUT DenseMatrixExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getRow_cuda DenseMatrixExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out
-                       OUTPUT DenseMatrixExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElement_cuda DenseMatrixExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
-                       OUTPUT DenseMatrixExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_addElement_cuda DenseMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
-                       OUTPUT DenseMatrixExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElement_cuda DenseMatrixExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out
-                       OUTPUT DenseMatrixExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_cuda DenseMatrixExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out
-                       OUTPUT DenseMatrixExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction_cuda DenseMatrixExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out
-                       OUTPUT DenseMatrixExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_forRows_cuda DenseMatrixExample_forRows.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out
-                       OUTPUT DenseMatrixExample_forRows.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_forAllRows_cuda DenseMatrixExample_forAllRows.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out
-                       OUTPUT DenseMatrixExample_forAllRows.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out
-                       OUTPUT DenseMatrixViewExample_constructor.out )
-
-ELSE()
-   ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out
-                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_setElements DenseMatrixExample_setElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements > 
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out
-                       OUTPUT DenseMatrixExample_setElements.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths DenseMatrixExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out
-                       OUTPUT DenseMatrixExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getElementsCount DenseMatrixExample_getElementsCount.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out
-                       OUTPUT DenseMatrixExample_getElementsCount.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getConstRow DenseMatrixExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out
-                       OUTPUT DenseMatrixExample_getConstRow.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getRow DenseMatrixExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out
-                       OUTPUT DenseMatrixExample_getRow.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_setElement DenseMatrixExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
-                       OUTPUT DenseMatrixExample_setElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_addElement DenseMatrixExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
-                       OUTPUT DenseMatrixExample_addElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getElement DenseMatrixExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out
-                       OUTPUT DenseMatrixExample_getElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_rowsReduction DenseMatrixExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out
-                       OUTPUT DenseMatrixExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction DenseMatrixExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out
-                       OUTPUT DenseMatrixExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_forRows DenseMatrixExample_forRows.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out
-                       OUTPUT DenseMatrixExample_forRows.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_forAllRows DenseMatrixExample_forAllRows.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out
-                       OUTPUT DenseMatrixExample_forAllRows.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out
-                       OUTPUT DenseMatrixViewExample_constructor.out )
-
-ENDIF()
-
-ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS
-   DenseMatrixExample_Constructor_init_list.out
-   DenseMatrixExample_setElements.out
-   DenseMatrixExample_getCompressedRowLengths.out
-   DenseMatrixExample_getElementsCount.out
-   DenseMatrixExample_getConstRow.out
-   DenseMatrixExample_getRow.out
-   DenseMatrixExample_setElement.out
-   DenseMatrixExample_addElement.out
-   DenseMatrixExample_getElement.out
-   DenseMatrixExample_rowsReduction.out
-   DenseMatrixExample_allRowsReduction.out
-   DenseMatrixExample_forRows.out
-   DenseMatrixExample_forAllRows.out
-   DenseMatrixViewExample_constructor.out
-)
-
+ADD_SUBDIRECTORY( DenseMatrix )
+ADD_SUBDIRECTORY( LambdaMatrix )
+ADD_SUBDIRECTORY( MultidiagonalMatrix )
+ADD_SUBDIRECTORY( SparseMatrix )
+ADD_SUBDIRECTORY( TridiagonalMatrix )
diff --git a/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..156b19dba5c25b19be76a2c401098084fbcf33c4
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt
@@ -0,0 +1,283 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list_cuda DenseMatrixExample_Constructor_init_list.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out
+                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElements_cuda DenseMatrixExample_setElements.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out
+                       OUTPUT DenseMatrixExample_setElements.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths_cuda DenseMatrixExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out
+                       OUTPUT DenseMatrixExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElementsCount_cuda DenseMatrixExample_getElementsCount.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out
+                       OUTPUT DenseMatrixExample_getElementsCount.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getConstRow_cuda DenseMatrixExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out
+                       OUTPUT DenseMatrixExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getRow_cuda DenseMatrixExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out
+                       OUTPUT DenseMatrixExample_getRow.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElement_cuda DenseMatrixExample_setElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
+                       OUTPUT DenseMatrixExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_addElement_cuda DenseMatrixExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
+                       OUTPUT DenseMatrixExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElement_cuda DenseMatrixExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out
+                       OUTPUT DenseMatrixExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_cuda DenseMatrixExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out
+                       OUTPUT DenseMatrixExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction_cuda DenseMatrixExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out
+                       OUTPUT DenseMatrixExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_forRows_cuda DenseMatrixExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out
+                       OUTPUT DenseMatrixExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixExample_forAllRows_cuda DenseMatrixExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out
+                       OUTPUT DenseMatrixExample_forAllRows.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out
+                       OUTPUT DenseMatrixViewExample_constructor.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getCompressedRowLengths_cuda DenseMatrixViewExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT DenseMatrixViewExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getElementsCount_cuda DenseMatrixViewExample_getElementsCount.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElementsCount_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElementsCount.out
+                       OUTPUT DenseMatrixViewExample_getElementsCount.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getConstRow_cuda DenseMatrixViewExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getConstRow.out
+                       OUTPUT DenseMatrixViewExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getRow_cuda DenseMatrixViewExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getRow.out
+                       OUTPUT DenseMatrixViewExample_getRow.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_setElement_cuda DenseMatrixViewExample_setElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out
+                       OUTPUT DenseMatrixViewExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_addElement_cuda DenseMatrixViewExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_addElement.out
+                       OUTPUT DenseMatrixViewExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getElement_cuda DenseMatrixViewExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElement.out
+                       OUTPUT DenseMatrixViewExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_rowsReduction_cuda DenseMatrixViewExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_rowsReduction.out
+                       OUTPUT DenseMatrixViewExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_allRowsReduction_cuda DenseMatrixViewExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_allRowsReduction.out
+                       OUTPUT DenseMatrixViewExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_forRows_cuda DenseMatrixViewExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forRows.out
+                       OUTPUT DenseMatrixViewExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_forAllRows_cuda DenseMatrixViewExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forAllRows.out
+                       OUTPUT DenseMatrixViewExample_forAllRows.out )
+
+ELSE()
+   ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out
+                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_setElements DenseMatrixExample_setElements.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out
+                       OUTPUT DenseMatrixExample_setElements.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths DenseMatrixExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out
+                       OUTPUT DenseMatrixExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_getElementsCount DenseMatrixExample_getElementsCount.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out
+                       OUTPUT DenseMatrixExample_getElementsCount.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_getConstRow DenseMatrixExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out
+                       OUTPUT DenseMatrixExample_getConstRow.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_getRow DenseMatrixExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out
+                       OUTPUT DenseMatrixExample_getRow.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_setElement DenseMatrixExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
+                       OUTPUT DenseMatrixExample_setElement.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_addElement DenseMatrixExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
+                       OUTPUT DenseMatrixExample_addElement.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_getElement DenseMatrixExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out
+                       OUTPUT DenseMatrixExample_getElement.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_rowsReduction DenseMatrixExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out
+                       OUTPUT DenseMatrixExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction DenseMatrixExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out
+                       OUTPUT DenseMatrixExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_forRows DenseMatrixExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out
+                       OUTPUT DenseMatrixExample_forRows.out )
+
+   ADD_EXECUTABLE( DenseMatrixExample_forAllRows DenseMatrixExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out
+                       OUTPUT DenseMatrixExample_forAllRows.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out
+                       OUTPUT DenseMatrixViewExample_constructor.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_getCompressedRowLengths DenseMatrixViewExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT DenseMatrixViewExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_getElementsCount DenseMatrixViewExample_getElementsCount.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElementsCount >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElementsCount.out
+                       OUTPUT DenseMatrixViewExample_getElementsCount.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_getConstRow DenseMatrixViewExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getConstRow.out
+                       OUTPUT DenseMatrixViewExample_getConstRow.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_getRow DenseMatrixViewExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getRow.out
+                       OUTPUT DenseMatrixViewExample_getRow.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_setElement DenseMatrixViewExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out
+                       OUTPUT DenseMatrixViewExample_setElement.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_addElement DenseMatrixViewExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_addElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_addElement.out
+                       OUTPUT DenseMatrixViewExample_addElement.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_getElement DenseMatrixViewExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElement.out
+                       OUTPUT DenseMatrixViewExample_getElement.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_rowsReduction DenseMatrixViewExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_rowsReduction.out
+                       OUTPUT DenseMatrixViewExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_allRowsReduction DenseMatrixViewExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_allRowsReduction.out
+                       OUTPUT DenseMatrixViewExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_forRows DenseMatrixViewExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forRows.out
+                       OUTPUT DenseMatrixViewExample_forRows.out )
+
+   ADD_EXECUTABLE( DenseMatrixViewExample_forAllRows DenseMatrixViewExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forAllRows.out
+                       OUTPUT DenseMatrixViewExample_forAllRows.out )
+
+ENDIF()
+
+ADD_CUSTOM_TARGET( RunDenseMatricesExamples ALL DEPENDS
+   DenseMatrixExample_Constructor_init_list.out
+   DenseMatrixExample_setElements.out
+   DenseMatrixExample_getCompressedRowLengths.out
+   DenseMatrixExample_getElementsCount.out
+   DenseMatrixExample_getConstRow.out
+   DenseMatrixExample_getRow.out
+   DenseMatrixExample_setElement.out
+   DenseMatrixExample_addElement.out
+   DenseMatrixExample_getElement.out
+   DenseMatrixExample_rowsReduction.out
+   DenseMatrixExample_allRowsReduction.out
+   DenseMatrixExample_forRows.out
+   DenseMatrixExample_forAllRows.out
+   DenseMatrixViewExample_constructor.out
+   DenseMatrixViewExample_getCompressedRowLengths.out
+   DenseMatrixViewExample_getElementsCount.out
+   DenseMatrixViewExample_getConstRow.out
+   DenseMatrixViewExample_getRow.out
+   DenseMatrixViewExample_setElement.out
+   DenseMatrixViewExample_addElement.out
+   DenseMatrixViewExample_getElement.out
+   DenseMatrixViewExample_rowsReduction.out
+   DenseMatrixViewExample_allRowsReduction.out
+   DenseMatrixViewExample_forRows.out
+   DenseMatrixViewExample_forAllRows.out
+
+)
+
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp
similarity index 80%
rename from Documentation/Examples/Matrices/DenseMatrixExample_addElement.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp
index 32e39e6a3ec5fb55618ed54523db22f34ed0ebbd..4a36b007e442a492fad1944f01d8d6cc6c1e3294 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_addElement.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp
@@ -9,13 +9,13 @@ void addElements()
    for( int i = 0; i < 5; i++ )
       matrix.setElement( i, i, i );
 
-   std::cout << "Initial matrix is: " << matrix << std::endl;
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
 
    for( int i = 0; i < 5; i++ )
       for( int j = 0; j < 5; j++ )
          matrix.addElement( i, j, 1.0, 5.0 );
 
-   std::cout << "Matrix after addition is: " << matrix << std::endl;
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_addElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_addElement.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
similarity index 94%
rename from Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
index 5fddf0f34ff789591802da01c143c3600baeadf3..e218db69022f8f4b43df7d6956fbc6afb0cfde00 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
@@ -8,7 +8,7 @@ void forAllRowsExample()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
 
-   auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) {
+   auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int columnIdx_, double& value, bool& compute ) {
       if( rowIdx < columnIdx )
          compute = false;
       else
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
similarity index 94%
rename from Documentation/Examples/Matrices/DenseMatrixExample_forRows.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
index f3e45a006adbe3f73ec5f37b89afc7f0aed81cce..f98c580fdf36ff6c2d0a13d12f35d4128970310a 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_forRows.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
@@ -8,7 +8,7 @@ void forRowsExample()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
 
-   auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) {
+   auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int columnIdx_, double& value, bool& compute ) {
       if( rowIdx < columnIdx )
          compute = false;
       else
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_forRows.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp
similarity index 89%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getElement.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp
index 72a5d0af44682b84c8e503c5bcc561666eb43088..7bd089ed8ade872dc6001ba97fec05659e95e2f5 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_getElement.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp
@@ -17,7 +17,7 @@ void getElements()
    for( int i = 0; i < 5; i++ )
    {
       for( int j = 0; j < 5; j++ )
-         std::cout << std::setw( 5 ) << std::ios::right << matrix.getElement( i, i );
+         std::cout << std::setw( 5 ) << matrix.getElement( i, j );
       std::cout << std::endl;
    }
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getElement.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp
similarity index 94%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp
index a95fa00e77e9f6b40de672a21931005c17862549..0a4a7bb7bb30ae92cb3464a4e8db8a9de1940fe4 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp
@@ -14,7 +14,7 @@ void getElementsCountExample()
       { 11, 12, 13, 14, 15 }
    };
 
-   std::cout << "Matrix elements count is " << triangularMatrix.getElementsCount() << "." << std::endl;
+   std::cout << "Matrix elements count is " << triangularMatrix.getAllocatedElementsCount() << "." << std::endl;
    std::cout << "Non-zero matrix elements count is " << triangularMatrix.getNonzeroElementsCount() << "." << std::endl;
 }
 
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getNonzeroElementsCount.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getNonzeroElementsCount.cu
new file mode 120000
index 0000000000000000000000000000000000000000..045fa3c1b11ffaf2bcad06b46462823230cf80ac
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getNonzeroElementsCount.cu
@@ -0,0 +1 @@
+DenseMatrixExample_getNonzeroElementsCount.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
similarity index 95%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getRow.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
index 00a6b11192a5f7fdedfc5964db674ed5fc4c05b7..0cf1e1d76d938a32b95b8702d659d47025851998 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
@@ -13,7 +13,7 @@ void getRowExample()
 
    auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
       auto row = matrix->getRow( rowIdx );
-      row.setElement( rowIdx, 10* ( rowIdx + 1 ) );
+      row.setElement( rowIdx, 10 * ( rowIdx + 1 ) );
    };
 
    /***
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_getRow.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b521d15d26cc789c6f3f8b6c32ead723d35ac1f4
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
@@ -0,0 +1,65 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   TNL::Matrices::DenseMatrix< double, Device > matrix {
+      {  1,  0,  0,  0,  0 },
+      {  1,  2,  0,  0,  0 },
+      {  0,  1,  8,  0,  0 },
+      {  0,  0,  1,  9,  0 },
+      {  0,  0,  0,  0,  1 } };
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp
similarity index 82%
rename from Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp
index 4f92496f83fab014758a616aee69fa50497b0037..9441cc60d8418030bf9cd6951483a726532d85f0 100644
--- a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp
@@ -2,6 +2,7 @@
 #include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Matrices/DenseMatrix.h>
 #include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Pointers/SmartPointersRegister.h>
 
@@ -18,12 +19,17 @@ void setElements()
    auto f = [=] __cuda_callable__ ( int i ) mutable {
       matrix->setElement( i, i, -i );
    };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use DenseMatrixView. See
+    * DenseMatrixView::getRow example for details.
+    */
    TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
    TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
 
    std::cout << "Matrix set from its native device:" << std::endl;
    std::cout << *matrix << std::endl;
-
 }
 
 int main( int argc, char* argv[] )
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_setElement.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_setElements.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElements.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_setElements.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bc72a6241037545c88d675a4d08d32f375a9a8d6
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
+   auto matrixView = matrix.getView();
+
+   for( int i = 0; i < 5; i++ )
+      matrixView.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < 5; i++ )
+      for( int j = 0; j < 5; j++ )
+         matrixView.addElement( i, j, 1.0, 5.0 );
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..9266b5e73d4f3a375a42070b53544e30d6ff6ded
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp
similarity index 56%
rename from Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp
index e8c036fa4d325bfd2eab7144eebae7337fc23ad5..d180caa518bbaaf05d3f245006c5a58637196919 100644
--- a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp
@@ -12,19 +12,19 @@ void createMatrixView()
       5,  6,  7,  8,
       9, 10, 11, 12 };
 
-   TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > matrix( 5, 5, values.getView() );
-
    /***
-    * We need a matrix view to pass the matrix to lambda function even on CUDA device.
+    * Create dense matrix view with row major order
     */
-   /*auto matrixView = matrix.getView();
-   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
-      auto row = matrixView.getRow( rowIdx );
-      row.setElement( rowIdx, 10* ( rowIdx + 1 ) );
-   };
+   TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > rowMajorMatrix( 3, 4, values.getView() );
+   std::cout << "Row major order matrix:" << std::endl;
+   std::cout << rowMajorMatrix << std::endl;
 
-   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f );
-   std::cout << matrix << std::endl;*/
+   /***
+    * Create dense matrix view with column major order
+    */
+   TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > columnMajorMatrix( 4, 3, values.getView() );
+   std::cout << "Column major order matrix:" << std::endl;
+   std::cout << columnMajorMatrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cu
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cu
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3c51e8ee5cb697af4de70f217e833001d852ab73
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forAllRowsExample()
+{
+   TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
+   auto matrixView = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) {
+      if( rowIdx < columnIdx )
+         compute = false;
+      else
+         value = rowIdx + columnIdx;
+   };
+
+   matrixView.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forAllRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forAllRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6b0114a09af2b8cdf504f518df9173935a71054b
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..810bf11186d26c707ce6138beac7467c5b44c97b
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
+   auto matrixView = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) {
+      if( rowIdx < columnIdx )
+         compute = false;
+      else
+         value = rowIdx + columnIdx;
+   };
+
+   matrixView.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..8111505a3bafe0c6aaad3434405418d628efeb90
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4572f41a38104aab7297d99a7e65920d351c7dfe
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getCompressedRowLengthsExample()
+{
+   TNL::Matrices::DenseMatrix< double, Device > triangularMatrix {
+      {  1 },
+      {  2,  3 },
+      {  4,  5,  6 },
+      {  7,  8,  9, 10 },
+      { 11, 12, 13, 14, 15 }
+   };
+   auto triangularMatrixView = triangularMatrix.getConstView();
+
+   std::cout << triangularMatrixView << std::endl;
+
+   TNL::Containers::Vector< int, Device > rowLengths;
+   triangularMatrixView.getCompressedRowLengths( rowLengths );
+
+   std::cout << "Compressed row lengths are: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting compressed row lengths on host: " << std::endl;
+   getCompressedRowLengthsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting compressed row lengths on CUDA device: " << std::endl;
+   getCompressedRowLengthsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..d9f9713a8c6ec97306ca86d804bcd0eeae57105b
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixViewExample_getConstRow.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..facd73fa1735401206d57fc286d9c1f6851ef7b4
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f4be000b903246e206963038d125a009bbbd568
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   TNL::Matrices::DenseMatrix< double, Device > matrix {
+      {  1,  0,  0,  0,  0 },
+      { -1,  2, -1,  0,  0 },
+      {  0, -1,  2, -1,  0 },
+      {  0,  0, -1,  2, -1 },
+      {  0,  0,  0,  0,  1 } };
+   auto matrixView = matrix.getConstView();
+
+
+   for( int i = 0; i < 5; i++ )
+   {
+      for( int j = 0; j < 5; j++ )
+         std::cout << std::setw( 5 ) << std::ios::right << matrixView.getElement( i, i );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..e2a308ee52189892fb9dbc64425a69854122fba8
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..04566eb27ba424040954a55e5e6ddc30859467b2
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp
@@ -0,0 +1,31 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getElementsCountExample()
+{
+   TNL::Matrices::DenseMatrix< double, Device > triangularMatrix {
+      {  1 },
+      {  2,  3 },
+      {  4,  5,  6 },
+      {  7,  8,  9, 10 },
+      { 11, 12, 13, 14, 15 }
+   };
+   auto triangularMatrixView = triangularMatrix.getConstView();
+   
+   std::cout << "Matrix elements count is " << triangularMatrixView.getAllocatedElementsCount() << "." << std::endl;
+   std::cout << "Non-zero matrix elements count is " << triangularMatrixView.getNonzeroElementsCount() << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Computing matrix elements on host: " << std::endl;
+   getElementsCountExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Computing matrix elements on CUDA device: " << std::endl;
+   getElementsCountExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cu
new file mode 120000
index 0000000000000000000000000000000000000000..b5d2bcebd60b9ff56fbe83a7095864dd10cff83c
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_getElementsCount.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
similarity index 81%
rename from Documentation/Examples/Matrices/DenseMatrixViewExample_getRow.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
index 30d893bc18ebc27f35c55566b900cb506084122c..8142599c14c0aa6f810f55ba3ff0d24e21c761de 100644
--- a/Documentation/Examples/Matrices/DenseMatrixViewExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
@@ -10,14 +10,19 @@ void getRowExample()
    TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
 
    /***
-    * We need a matrix view to pass the matrix to lambda function even on CUDA device.
+    * Create dense matrix view which can be captured by the following lambda
+    * function.
     */
    auto matrixView = matrix.getView();
+
    auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
       auto row = matrixView.getRow( rowIdx );
-      row.setElement( rowIdx, 10* ( rowIdx + 1 ) );
+      row.setElement( rowIdx, 10 * ( rowIdx + 1 ) );
    };
 
+   /***
+    * Set the matrix elements.
+    */
    TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f );
    std::cout << matrix << std::endl;
 }
@@ -31,4 +36,4 @@ int main( int argc, char* argv[] )
    std::cout << "Getting matrix rows on CUDA device: " << std::endl;
    getRowExample< TNL::Devices::Cuda >();
 #endif
-}
\ No newline at end of file
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..5af514505570549792f25111233876e7e7db3816
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
similarity index 100%
rename from Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4170aaa628a8965768169b1da468517430143990
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..92985bc5aafc465277d2c571a20d7f64391d6357
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void setElements()
+{
+   TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
+   auto matrixView = matrix.getView();
+   for( int i = 0; i < 5; i++ )
+      matrixView.setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      matrixView.setElement( i, i, -i );
+   };
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Set elements on CUDA device:" << std::endl;
+   setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..902de92255413ee4789d85480658bb84abb8dda5
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6315309b2362e822e35569316331e819e115566d
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt
@@ -0,0 +1,69 @@
+ADD_EXECUTABLE( LambdaMatrixExample_Constructor LambdaMatrixExample_Constructor.cpp )
+ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Constructor >
+                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Constructor.out
+                    OUTPUT LambdaMatrixExample_Constructor.out )
+
+ADD_EXECUTABLE( LambdaMatrixExample_getCompressedRowLengths LambdaMatrixExample_getCompressedRowLengths.cpp )
+ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getCompressedRowLengths >
+                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getCompressedRowLengths.out
+                    OUTPUT LambdaMatrixExample_getCompressedRowLengths.out )
+
+ADD_EXECUTABLE( LambdaMatrixExample_getNonzeroElementsCount LambdaMatrixExample_getNonzeroElementsCount.cpp )
+ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getNonzeroElementsCount >
+                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getNonzeroElementsCount.out
+                    OUTPUT LambdaMatrixExample_getNonzeroElementsCount.out )
+
+
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction_cuda LambdaMatrixExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out
+                       OUTPUT LambdaMatrixExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_allRowsReduction_cuda LambdaMatrixExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_allRowsReduction.out
+                       OUTPUT LambdaMatrixExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_forRows_cuda LambdaMatrixExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forRows.out
+                       OUTPUT LambdaMatrixExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_forAllRows_cuda LambdaMatrixExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forAllRows.out
+                       OUTPUT LambdaMatrixExample_forAllRows.out )
+
+ELSE()
+   ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction LambdaMatrixExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out
+                       OUTPUT LambdaMatrixExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( LambdaMatrixExample_allRowsReduction LambdaMatrixExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_allRowsReduction.out
+                       OUTPUT LambdaMatrixExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( LambdaMatrixExample_forRows LambdaMatrixExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forRows >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forRows.out
+                       OUTPUT LambdaMatrixExample_forRows.out )
+
+   ADD_EXECUTABLE( LambdaMatrixExample_forAllRows LambdaMatrixExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forAllRows.out
+                       OUTPUT LambdaMatrixExample_forAllRows.out )
+ENDIF()
+
+ADD_CUSTOM_TARGET( RunLambdaMatricesExamples ALL DEPENDS
+   LambdaMatrixExample_Constructor.out
+   LambdaMatrixExample_getCompressedRowLengths.out
+   LambdaMatrixExample_getNonzeroElementsCount.out
+   LambdaMatrixExample_rowsReduction.out
+   LambdaMatrixExample_allRowsReduction.out
+   LambdaMatrixExample_forRows.out
+   LambdaMatrixExample_forAllRows.out
+)
+
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2c418dd5407db95fa75a3ea49c9664db4be19fa2
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Matrices/LambdaMatrix.h>
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return 1; };
+   auto matrixElements1 = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = rowIdx;
+         value =  1.0;
+   };
+   auto matrixElements2 = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = rowIdx;
+         value =  rowIdx;
+   };
+
+   const int size = 5;
+
+   /***
+    * Matrix construction with explicit type definition.
+    */
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( matrixElements1, rowLengths ) );
+   MatrixType m1( size, size, matrixElements1, rowLengths );
+
+   /***
+    * Matrix construction using 'auto'.
+    */
+   auto m2 = TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( matrixElements2, rowLengths );
+   m2.setDimensions( size, size );
+
+   std::cout << "The first lambda matrix: " << std::endl << m1 << std::endl;
+   std::cout << "The second lambda matrix: " << std::endl << m2 << std::endl;
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a39a1f33b339aeeb9420612009bcd0080ffafb44
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void allRowsReduction()
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = localIdx;
+         value = TNL::max( rowIdx - columnIdx + 1, 0 );
+   };
+
+   using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >;
+   auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view and matrix view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "All rows reduction on host:" << std::endl;
+   allRowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "All rows reduction on CUDA device:" << std::endl;
+   allRowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f76fba15c56a9d0c0f26f605c6745b2e3cd28da3
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..72ff9610141cdcc702bfc25128d37fbff2eec423
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp
@@ -0,0 +1,43 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = localIdx;
+         value = TNL::max( rowIdx - columnIdx + 1, 0 );
+   };
+
+   using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >;
+   auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths );
+
+   TNL::Matrices::DenseMatrix< double, Device > denseMatrix( 5, 5 );
+   auto denseView = denseMatrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double value, bool& compute ) mutable {
+      denseView.setElement( rowIdx, columnIdx, value );
+   };
+
+   matrix.forAllRows( f );
+   std::cout << "Original lambda matrix:" << std::endl << matrix << std::endl;
+   std::cout << "Dense matrix:" << std::endl << denseMatrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Copying matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Copying matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..fef2d377766da09f511f8678ad4bc5fa9050a44d
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d5cf660a6297bc453b241d8b231942d9fa55c258
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
@@ -0,0 +1,43 @@
+#include <iostream>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = localIdx;
+         value = TNL::max( rowIdx - columnIdx + 1, 0 );
+   };
+
+   using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >;
+   auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths );
+
+   TNL::Matrices::DenseMatrix< double, Device > denseMatrix( 5, 5 );
+   auto denseView = denseMatrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double value, bool& compute ) mutable {
+      denseView.setElement( rowIdx, columnIdx, value );
+   };
+
+   matrix.forRows( 0, matrix.getRows(), f );
+   std::cout << "Original lambda matrix:" << std::endl << matrix << std::endl;
+   std::cout << "Dense matrix:" << std::endl << denseMatrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Copying matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Copying matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6df275619c15af4f43617de7d068083cf4028590
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..797a0bbbe7c050f71a5cdb2cdbcbdfe1b8423d66
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp
@@ -0,0 +1,23 @@
+#include <iostream>
+#include <TNL/Matrices/LambdaMatrix.h>
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = localIdx;
+         value = TNL::max( rowIdx - columnIdx + 1, 0 );
+   };
+
+   const int size = 5;
+   auto matrix = TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( size, size, matrixElements, rowLengths );
+
+   TNL::Containers::Vector< int > rowLengthsVector;
+   matrix.getCompressedRowLengths( rowLengthsVector );
+
+   std::cout << "Matrix looks as:" << std::endl << matrix << std::endl;
+   std::cout << "Compressed row lengths are: " << rowLengthsVector << std::endl;
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..03568be58ee58bb4f560ad98ed60f8d63afd4cad
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..43f015c48871995e450d9f5e953f4c9a411ecbf7
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cpp
@@ -0,0 +1,20 @@
+#include <iostream>
+#include <TNL/Matrices/LambdaMatrix.h>
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = localIdx;
+         value = TNL::max( rowIdx - columnIdx + 1, 0 );
+   };
+
+   const int size = 5;
+   auto matrix = TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( size, size, matrixElements, rowLengths );
+
+   std::cout << "Matrix looks as:" << std::endl << matrix << std::endl;
+   std::cout << "Non-zero elements count is: " << matrix.getNonzeroElementsCount() << std::endl;
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..17f3ace0d64ec2f95b4c20f28ec2609c2a36f3f7
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Lambda functions defining the matrix.
+    */
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) {
+         columnIdx = localIdx;
+         value = TNL::max( rowIdx - columnIdx + 1, 0 );
+   };
+
+   using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >;
+   auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ecb2401ec9f6a98146b2320d6e6d21de4580bd07
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10a1ed7329f139deb785a313189abc39b092e02d
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt
@@ -0,0 +1,290 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_cuda MultidiagonalMatrixExample_Constructor.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor.out
+                       OUTPUT MultidiagonalMatrixExample_Constructor.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_1_cuda MultidiagonalMatrixExample_Constructor_init_list_1.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_1_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_1.out
+                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_1.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_2_cuda MultidiagonalMatrixExample_Constructor_init_list_2.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_2_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_2.out
+                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_2.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getSerializationType_cuda MultidiagonalMatrixExample_getSerializationType.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getSerializationType_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getSerializationType.out
+                       OUTPUT MultidiagonalMatrixExample_getSerializationType.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_setElements_cuda MultidiagonalMatrixExample_setElements.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElements_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElements.out
+                       OUTPUT MultidiagonalMatrixExample_setElements.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getCompressedRowLengths_cuda MultidiagonalMatrixExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getCompressedRowLengths.out
+                       OUTPUT MultidiagonalMatrixExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getConstRow_cuda MultidiagonalMatrixExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getConstRow.out
+                       OUTPUT MultidiagonalMatrixExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getRow_cuda MultidiagonalMatrixExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getRow.out
+                       OUTPUT MultidiagonalMatrixExample_getRow.out )
+
+# This example does not work with nvcc 10.1. Restore it here when it works.
+#   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement_cuda MultidiagonalMatrixExample_setElement.cu )
+#   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement_cuda >
+#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out
+#                       OUTPUT MultidiagonalMatrixExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_addElement_cuda MultidiagonalMatrixExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_addElement.out
+                       OUTPUT MultidiagonalMatrixExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getElement_cuda MultidiagonalMatrixExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getElement.out
+                       OUTPUT MultidiagonalMatrixExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_rowsReduction_cuda MultidiagonalMatrixExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_rowsReduction.out
+                       OUTPUT MultidiagonalMatrixExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_allRowsReduction_cuda MultidiagonalMatrixExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_allRowsReduction.out
+                       OUTPUT MultidiagonalMatrixExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_forRows_cuda MultidiagonalMatrixExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forRows.out
+                       OUTPUT MultidiagonalMatrixExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_forAllRows_cuda MultidiagonalMatrixExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forAllRows.out
+                       OUTPUT MultidiagonalMatrixExample_forAllRows.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getCompressedRowLengths_cuda MultidiagonalMatrixViewExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT MultidiagonalMatrixViewExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getConstRow_cuda MultidiagonalMatrixViewExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getConstRow.out
+                       OUTPUT MultidiagonalMatrixViewExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getRow_cuda MultidiagonalMatrixViewExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getRow.out
+                       OUTPUT MultidiagonalMatrixViewExample_getRow.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_setElement_cuda MultidiagonalMatrixViewExample_setElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_setElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_setElement.out
+                       OUTPUT MultidiagonalMatrixViewExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_addElement_cuda MultidiagonalMatrixViewExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_addElement.out
+                       OUTPUT MultidiagonalMatrixViewExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getElement_cuda MultidiagonalMatrixViewExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getElement_cuda >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getElement.out
+                       OUTPUT MultidiagonalMatrixViewExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_rowsReduction_cuda MultidiagonalMatrixViewExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_rowsReduction.out
+                       OUTPUT MultidiagonalMatrixViewExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_allRowsReduction_cuda MultidiagonalMatrixViewExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_allRowsReduction.out
+                       OUTPUT MultidiagonalMatrixViewExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forRows_cuda MultidiagonalMatrixViewExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forRows.out
+                       OUTPUT MultidiagonalMatrixViewExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forAllRows_cuda MultidiagonalMatrixViewExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forAllRows.out
+                       OUTPUT MultidiagonalMatrixViewExample_forAllRows.out )
+
+ELSE()
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor MultidiagonalMatrixExample_Constructor.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor.out
+                       OUTPUT MultidiagonalMatrixExample_Constructor.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_1 MultidiagonalMatrixExample_Constructor_init_list_1.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_1 >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_1.out
+                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_1.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_2 MultidiagonalMatrixExample_Constructor_init_list_2.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_2 >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_2.out
+                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_2.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_getSerializationType MultidiagonalMatrixExample_getSerializationType.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getSerializationType > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getSerializationType.out
+                       OUTPUT MultidiagonalMatrixExample_getSerializationType.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_setElements MultidiagonalMatrixExample_setElements.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElements > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElements.out
+                       OUTPUT MultidiagonalMatrixExample_setElements.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_getCompressedRowLengths MultidiagonalMatrixExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getCompressedRowLengths.out
+                       OUTPUT MultidiagonalMatrixExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_getConstRow MultidiagonalMatrixExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getConstRow.out
+                       OUTPUT MultidiagonalMatrixExample_getConstRow.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_getRow MultidiagonalMatrixExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getRow.out
+                       OUTPUT MultidiagonalMatrixExample_getRow.out )
+
+#  This example does not work with nvcc 10.1. Restore it here when it works.
+#   ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement MultidiagonalMatrixExample_setElement.cpp )
+#   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement >
+#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out
+#                       OUTPUT MultidiagonalMatrixExample_setElement.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_addElement MultidiagonalMatrixExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_addElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_addElement.out
+                       OUTPUT MultidiagonalMatrixExample_addElement.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_getElement MultidiagonalMatrixExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getElement.out
+                       OUTPUT MultidiagonalMatrixExample_getElement.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_rowsReduction MultidiagonalMatrixExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_rowsReduction.out
+                       OUTPUT MultidiagonalMatrixExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_allRowsReduction MultidiagonalMatrixExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_allRowsReduction.out
+                       OUTPUT MultidiagonalMatrixExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_forRows MultidiagonalMatrixExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forRows >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forRows.out
+                       OUTPUT MultidiagonalMatrixExample_forRows.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_forAllRows MultidiagonalMatrixExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forAllRows.out
+                       OUTPUT MultidiagonalMatrixExample_forAllRows.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getCompressedRowLengths MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT MultidiagonalMatrixViewExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getConstRow MultidiagonalMatrixViewExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getConstRow.out
+                       OUTPUT MultidiagonalMatrixViewExample_getConstRow.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getRow MultidiagonalMatrixViewExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getRow.out
+                       OUTPUT MultidiagonalMatrixViewExample_getRow.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_setElement MultidiagonalMatrixViewExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_setElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_setElement.out
+                       OUTPUT MultidiagonalMatrixViewExample_setElement.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_addElement MultidiagonalMatrixViewExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_addElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_addElement.out
+                       OUTPUT MultidiagonalMatrixViewExample_addElement.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getElement MultidiagonalMatrixViewExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getElement.out
+                       OUTPUT MultidiagonalMatrixViewExample_getElement.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_rowsReduction MultidiagonalMatrixViewExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_rowsReduction >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_rowsReduction.out
+                       OUTPUT MultidiagonalMatrixViewExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_allRowsReduction MultidiagonalMatrixViewExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_allRowsReduction >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_allRowsReduction.out
+                       OUTPUT MultidiagonalMatrixViewExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forRows MultidiagonalMatrixViewExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forRows >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forRows.out
+                       OUTPUT MultidiagonalMatrixViewExample_forRows.out )
+
+   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forAllRows MultidiagonalMatrixViewExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forAllRows.out
+                       OUTPUT MultidiagonalMatrixViewExample_forAllRows.out )
+
+ENDIF()
+
+   ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement MultidiagonalMatrixExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out
+                       OUTPUT MultidiagonalMatrixExample_setElement.out )
+
+
+ADD_CUSTOM_TARGET( RunMultidiagonalMatricesExamples ALL DEPENDS
+   MultidiagonalMatrixExample_Constructor.out
+   MultidiagonalMatrixExample_Constructor_init_list_1.out
+   MultidiagonalMatrixExample_Constructor_init_list_2.out
+   MultidiagonalMatrixExample_getSerializationType.out
+   MultidiagonalMatrixExample_setElements.out
+   MultidiagonalMatrixExample_getCompressedRowLengths.out
+   MultidiagonalMatrixExample_getConstRow.out
+   MultidiagonalMatrixExample_getRow.out
+   MultidiagonalMatrixExample_setElement.out
+   MultidiagonalMatrixExample_addElement.out
+   MultidiagonalMatrixExample_getElement.out
+   MultidiagonalMatrixExample_rowsReduction.out
+   MultidiagonalMatrixExample_allRowsReduction.out
+   MultidiagonalMatrixExample_forRows.out
+   MultidiagonalMatrixExample_forAllRows.out
+   MultidiagonalMatrixViewExample_getCompressedRowLengths.out
+   MultidiagonalMatrixViewExample_getConstRow.out
+   MultidiagonalMatrixViewExample_getRow.out
+   MultidiagonalMatrixViewExample_setElement.out
+   MultidiagonalMatrixViewExample_addElement.out
+   MultidiagonalMatrixViewExample_getElement.out
+   MultidiagonalMatrixViewExample_rowsReduction.out
+   MultidiagonalMatrixViewExample_allRowsReduction.out
+   MultidiagonalMatrixViewExample_forRows.out
+   MultidiagonalMatrixViewExample_forAllRows.out
+)
+
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8f8b8139bc172321d2037d931b51652c506581f2
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp
@@ -0,0 +1,50 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void laplaceOperatorMatrix()
+{
+   /***
+    * Set  matrix representing approximation of the Laplace operator on regular
+    * grid using the finite difference method.
+    */
+   const int gridSize( 4 );
+   const int matrixSize = gridSize * gridSize;
+   TNL::Containers::Vector< int, Device > shifts { - gridSize, -1, 0, 1, gridSize };
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( matrixSize, matrixSize, shifts );
+   auto matrixView = matrix.getView();
+   auto f = [=] __cuda_callable__ ( int i, int j ) mutable {
+      const int elementIdx = j * gridSize + i;
+      auto row = matrixView.getRow( elementIdx );
+      if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 )
+         row.setElement( 2, 1.0 ); // set matrix elements corresponding to boundary grid nodes
+                                   // and Dirichlet boundary conditions, i.e. 1 on the main diagonal
+                                   // which is the third one
+      else
+      {
+         row.setElement( 0, -1.0 ); // set matrix elements corresponding to inner grid nodes, i.e.
+         row.setElement( 1, -1.0 ); // 4 on the main diagonal (the third one) and -1 to the other
+         row.setElement( 2,  4.0 ); // sub-diagonals
+         row.setElement( 3, -1.0 );
+         row.setElement( 4, -1.0 );
+      }
+   };
+   TNL::Algorithms::ParallelFor2D< Device >::exec( 0, 0, gridSize, gridSize, f );
+
+   std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cu
new file mode 120000
index 0000000000000000000000000000000000000000..7d790b5c9e1a87957e544064912a2d1d3864499a
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_Constructor.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1056ab9c6ae6a2f9b407298b97a9d91e35296e36
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp
@@ -0,0 +1,49 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void laplaceOperatorMatrix()
+{
+   /***
+    * Set  matrix representing approximation of the Laplace operator on regular
+    * grid using the finite difference method.
+    */
+   const int gridSize( 4 );
+   const int matrixSize = gridSize * gridSize;
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( matrixSize, matrixSize, { - gridSize, -1, 0, 1, gridSize } );
+   auto matrixView = matrix.getView();
+   auto f = [=] __cuda_callable__ ( int i, int j ) mutable {
+      const int elementIdx = i * gridSize + j;
+      auto row = matrixView.getRow( elementIdx );
+      if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 )
+         row.setElement( 2, 1.0 ); // set matrix elements corresponding to boundary grid nodes
+                                   // and Dirichlet boundary conditions, i.e. 1 on the main diagonal
+                                   // which is the third one
+      else
+      {
+         row.setElement( 0, -1.0 ); // set matrix elements corresponding to inner grid nodes, i.e.
+         row.setElement( 1, -1.0 ); // 4 on the main diagonal (the third one) and -1 to the other
+         row.setElement( 2,  4.0 ); // sub-diagonals
+         row.setElement( 3, -1.0 );
+         row.setElement( 4, -1.0 );
+      }
+   };
+   TNL::Algorithms::ParallelFor2D< Device >::exec( 0, 0, gridSize, gridSize, f );
+
+   std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6b3354ef624c1643a1893da638a7122335153d71
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_Constructor_init_list_1.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..60bcde7fc84d5c11bf6483729a0d8fbf33114599
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp
@@ -0,0 +1,59 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void createMultidiagonalMatrix()
+{
+   const int matrixSize = 6;
+
+   /***
+    * Setup the following matrix (dots represent zeros):
+    * 
+    * /  4 -1 .  -1  .  . \
+    * | -1  4 -1  . -1  . |
+    * |  . -1  4 -1  . -1 |
+    * | -1  . -1  4 -1  . |
+    * |  . -1  . -1  4 -1 |
+    * \  .  .  1  . -1  4 /
+    * 
+    * The diagonals offsets are { -3, -1, 0, 1, 3 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( 
+      matrixSize, { -3, -1, 0, 1, 3 }, {
+   /***
+    * To set the matrix elements we first extend the diagonals to their full
+    * lengths even outside the matrix (dots represent zeros and zeros are
+    * artificial zeros used for memory alignment):
+    * 
+    * 0  .  0 /  4 -1 .  -1  .  . \              -> {  0,  0,  4, -1, -1 }
+    * .  0  . | -1  4 -1  . -1  . | .            -> {  0, -1,  4, -1, -1 }
+    * .  .  0 |  . -1  4 -1  . -1 | .  .         -> {  0, -1,  4, -1, -1 }
+    *    .  . | -1  . -1  4 -1  . | 0  .  .      -> { -1, -1,  4, -1,  0 }
+    *       . |  . -1  . -1  4 -1 | .  0  .  .   -> { -1, -1,  4, -1,  0 }
+    *         \  .  .  1  . -1  4 / 0  .  0  . . -> { -1, -1,  4,  0,  0 }
+    * 
+    */
+      {  0,  0,  4, -1, -1 },
+      {  0, -1,  4, -1, -1 },
+      {  0, -1,  4, -1, -1 },
+      { -1, -1,  4, -1,  0 },
+      { -1, -1,  4, -1,  0 },
+      { -1, -1,  4,  0,  0 }
+      } );
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Create multidiagonal matrix on CPU ... " << std::endl;
+   createMultidiagonalMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating multidiagonal matrix on CUDA GPU ... " << std::endl;
+   createMultidiagonalMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cu
new file mode 120000
index 0000000000000000000000000000000000000000..9098df5e3780bd23c761220d2c82770d44a5bcc6
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_Constructor_init_list_2.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a2da4e40cda5cba3d1f1084a0eee869e780d4c24
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix(
+      matrixSize,     // number of rows
+      matrixSize,     // number of columns
+      { -1, 0, 1 } ); // diagonals offsets
+   for( int i = 0; i < matrixSize; i++ )
+      matrix.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      if( i > 0 )
+         matrix.addElement( i, i - 1, 1.0, 5.0 );
+      matrix.addElement( i, i, 1.0, 5.0 );
+      if( i < matrixSize - 1 )
+         matrix.addElement( i, i + 1, 1.0, 5.0 );
+   }
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..04b6a5875bc8ee99d2a599ccde619ea4797a9ee0
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4b102c73b680ca9f9c0c63f315b81c91fb24dbbf
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
@@ -0,0 +1,80 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void allRowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { -2, -1, 0 },  // diagonals offsets
+      { { 0, 0, 1 },  // matrix elements
+        { 0, 2, 1 }, 
+        { 3, 2, 1 }, 
+        { 3, 2, 1 },
+        { 3, 2, 1 } } );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   allRowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   allRowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..c8659a5f4bc549e90cc8e84a26f41eb5a1d74a2e
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0114acf63d946545d4a05f9015de26c1da65ff2c
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forAllRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix(
+      5,               // number of matrix rows
+      5,               // number of matrix columns
+      { -2, -1, 0 } ); // matrix diagonals offsets
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                              0  1  2  <- localIdx values
+       *                              -------
+       * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+       *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+       *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+       *      | .  3  2  1  . |  -> { 3, 2, 1 }
+       *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forAllRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forAllRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..b18e48f2bbac3fd52a1c814f0b90728cc72f1aa1
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..07382c2e3c809fd0a3d583564a656fc812e8e0f6
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix(
+      5,               // number of matrix rows
+      5,               // number of matrix columns
+      { -2, -1, 0 } ); // matrix diagonals offsets
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                              0  1  2  <- localIdx values
+       *                              -------
+       * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+       *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+       *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+       *      | .  3  2  1  . |  -> { 3, 2, 1 }
+       *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   matrix.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..aff0dad0c51477bd34514e1e58420365f75faea5
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5617b514d94259a88e7e9c5f0e4b6c7bd2c9e7da
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp
@@ -0,0 +1,51 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void laplaceOperatorMatrix()
+{
+   const int gridSize( 4 );
+   const int matrixSize = gridSize * gridSize;
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( 
+      matrixSize,                     // number of rows
+      matrixSize,                     // number of columns
+   { - gridSize, -1, 0, 1, gridSize } // diagonals offsets
+   );
+   matrix.setElements( {
+         {  0.0,  0.0, 1.0 },  // set matrix elements corresponding to boundary grid nodes
+         {  0.0,  0.0, 1.0 },  // and Dirichlet boundary conditions, i.e. 1 on the main diagonal
+         {  0.0,  0.0, 1.0 },  // which is the third one
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         { -1.0, -1.0, 4.0, -1.0, -1.0 }, // set matrix elements corresponding to inner grid nodes, i.e. 4 on the main diagonal
+         { -1.0, -1.0, 4.0, -1.0, -1.0 }, //  (the third one) and -1 to the other sub-diagonals
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         { -1.0, -1.0, 4.0, -1.0, -1.0 },
+         { -1.0, -1.0, 4.0, -1.0, -1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 }
+      } );
+   TNL::Containers::Vector< int, Device > rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl;
+   std::cout << "Compressed row lengths: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..b711bdfdfd899ef94de502fe73c659b6ff72caff
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1cbda6be7f792de05607eecafebe938f93868ec8
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp
@@ -0,0 +1,58 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize = 5;
+   auto diagonalsOffsets = { -2, -1, 0 };
+   using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< MatrixType > matrix (
+      matrixSize,  // number of matrix rows
+      matrixSize,  // number of matrix columns
+      diagonalsOffsets );
+   matrix->setElements(
+      {  { 0.0, 0.0, 1.0 },
+         { 0.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 } } );
+
+   /***
+    * Fetch lambda function returns diagonal element in each row.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
+      auto row = matrix->getRow( rowIdx );
+      return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use MultidiagonalMatrixView. See
+    * MultidiagonalMatrixView::getConstRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+
+   /***
+    * Compute the matrix trace.
+    */
+   int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix->getRows(), std::plus<>{}, fetch, 0 );
+   std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl;
+   std::cout << "Matrix trace is: " << trace << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ec42cc67428b8927ad0212919924e31df4a3b044
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1d7486fb5c249cdceefbff082b256ff0341c84f
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
+      matrixSize,   // number of matrix columns
+      { -1, 0, 1 }, // matrix diagonals offsets
+      {             // matrix elements definition
+         {  0.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0,  0.0 }
+      } );
+
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      for( int j = 0; j < matrixSize; j++ )
+         std::cout << std::setw( 5 ) << matrix.getElement( i, j );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6eb5f59c20cd825a25a8ae0b96918755e5f77606
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..856848bd3acb989e5cbcd8c3e66baf0f1f934009
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
@@ -0,0 +1,61 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize( 5 );
+   auto diagonalsOffsets = { -1, 0, 1 }; // Variadic templates in SharedPointer
+                                         // constructor do not recognize initializer
+                                         // list so we give it a hint.
+   using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< MatrixType > matrix(
+      matrixSize,  // number of matrix rows
+      matrixSize,  // number of matrix columns
+      diagonalsOffsets );
+
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      //auto row = matrix->getRow( rowIdx );    
+      // For some reason the previous line of code is not accepted by nvcc 10.1 
+      // so we replace it with the following two lines.
+      auto ref = matrix.modifyData();
+      auto row = ref.getRow( rowIdx );
+
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < matrixSize - 1 )  // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use MultidiagonalMatrixView. See
+    * MultidiagonalMatrixView::getRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+
+   /***
+    * Set the matrix elements.
+    */
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f );
+   std::cout << std::endl << *matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   // It seems that nvcc 10.1 does not handle lambda functions properly. 
+   // It is hard to make nvcc to compile this example and it does not work
+   // properly. We will try it with later version of CUDA.
+   //std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   //getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6eca2f44497da66297dec924982041403e63eb58
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a72e90dce2cc97a278f6c91e7c03cf5a5dd1b851
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp
@@ -0,0 +1,23 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void getSerializationTypeExample()
+{
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix;
+
+   std::cout << "Matrix type is: " << matrix.getSerializationType();
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get serialization type on CPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get serialization type on CUDA GPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cu
new file mode 120000
index 0000000000000000000000000000000000000000..322cb05b89cce1472fdae1a6b93d0043300e30d7
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_getSerializationType.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dc3d4048384c48e85d952d2f35a10ad55a40d491
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
@@ -0,0 +1,80 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { -2, -1, 0 },  // diagonals offsets
+      { { 0, 0, 1 },  // matrix elements
+        { 0, 2, 1 }, 
+        { 3, 2, 1 }, 
+        { 3, 2, 1 },
+        { 3, 2, 1 } } );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..adaff28e8a549eeb8b5539535f78e5cc3594f698
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bf28ec22661be19d99fdfb31b48a2f2e44f46285
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+
+template< typename Device >
+void setElements()
+{
+   const int matrixSize( 5 );
+   auto diagonalsOffsets = { -1, 0, 1 }; // offsets of tridiagonal matrix
+   using Matrix = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< Matrix > matrix( matrixSize, matrixSize, diagonalsOffsets );
+   for( int i = 0; i < 5; i++ )
+      matrix->setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << *matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      if( i > 0 )
+         matrix->setElement( i, i - 1, 1.0 );
+      matrix->setElement( i, i, -i );
+      if( i < matrixSize - 1 )
+         matrix->setElement( i, i + 1, 1.0 );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use MultidiagonalMatrixView. See
+    * MultidiagonalMatrixView::getRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << *matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   // It seems that nvcc 10.1 does not handle lambda functions properly. 
+   // It is hard to make nvcc to compile this example and it does not work
+   // properly. We will try it with later version of CUDA.
+   //std::cout << "Set elements on CUDA device:" << std::endl;
+   //setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..88a0fa864e713cd8d5c1c27c20f16aa108e2184a
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b43f2e341d53202a664fdb8452e5f1c91cd60b18
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp
@@ -0,0 +1,62 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void createMultidiagonalMatrix()
+{
+   const int matrixSize = 6;
+
+   /***
+    * Setup the following matrix (dots represent zeros):
+    * 
+    * /  4 -1 .  -1  .  . \
+    * | -1  4 -1  . -1  . |
+    * |  . -1  4 -1  . -1 |
+    * | -1  . -1  4 -1  . |
+    * |  . -1  . -1  4 -1 |
+    * \  .  .  1  . -1  4 /
+    * 
+    * The diagonals offsets are { -3, -1, 0, 1, 3 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( 
+      matrixSize,             // number of matrix rows
+      matrixSize,             // number of matrix columns
+      { -3, -1, 0, 1, 3 } );  // matrix diagonals offsets
+   matrix.setElements( {
+   /***
+    * To set the matrix elements we first extend the diagonals to their full
+    * lengths even outside the matrix (dots represent zeros and zeros are
+    * artificial zeros used for memory alignment):
+    * 
+    * 0  .  0 /  4 -1 .  -1  .  . \              -> {  0,  0,  4, -1, -1 }
+    * .  0  . | -1  4 -1  . -1  . | .            -> {  0, -1,  4, -1, -1 }
+    * .  .  0 |  . -1  4 -1  . -1 | .  .         -> {  0, -1,  4, -1, -1 }
+    *    .  . | -1  . -1  4 -1  . | 0  .  .      -> { -1, -1,  4, -1,  0 }
+    *       . |  . -1  . -1  4 -1 | .  0  .  .   -> { -1, -1,  4, -1,  0 }
+    *         \  .  .  1  . -1  4 / 0  .  0  . . -> { -1, -1,  4,  0,  0 }
+    * 
+    */
+      {  0,  0,  4, -1, -1 },
+      {  0, -1,  4, -1, -1 },
+      {  0, -1,  4, -1, -1 },
+      { -1, -1,  4, -1,  0 },
+      { -1, -1,  4, -1,  0 },
+      { -1, -1,  4,  0,  0 }
+      } );
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Create multidiagonal matrix on CPU ... " << std::endl;
+   createMultidiagonalMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating multidiagonal matrix on CUDA GPU ... " << std::endl;
+   createMultidiagonalMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..b5a31ea141f6fcba723e453926d54692852a7a6e
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_setElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..46f92d7a3a60f7f277e34cb36961f31f4b52ec3f
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp
@@ -0,0 +1,40 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix(
+      matrixSize,     // number of rows
+      matrixSize,     // number of columns
+      { -1, 0, 1 } ); // diagonals offsets
+   auto view = matrix.getView();
+   for( int i = 0; i < matrixSize; i++ )
+      view.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      if( i > 0 )
+         view.addElement( i, i - 1, 1.0, 5.0 );
+      view.addElement( i, i, 1.0, 5.0 );
+      if( i < matrixSize - 1 )
+         view.addElement( i, i + 1, 1.0, 5.0 );
+   }
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..7d6a43822f99a19983647d1ef40eb46b0f6b3234
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..26dac464cea68636fb61458ab2e86f3ca153ed56
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
@@ -0,0 +1,81 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void allRowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { -2, -1, 0 },  // diagonals offsets
+      { { 0, 0, 1 },  // matrix elements
+        { 0, 2, 1 }, 
+        { 3, 2, 1 }, 
+        { 3, 2, 1 },
+        { 3, 2, 1 } } );
+   auto view = matrix.getView();
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   view.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   allRowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   allRowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..5e14692de8159377e123ed8fec43cd750143b986
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..143aa864c88a16c2100027b1e32524456f67c991
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp
@@ -0,0 +1,57 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forAllRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix(
+      5,               // number of matrix rows
+      5,               // number of matrix columns
+      { -2, -1, 0 } ); // matrix diagonals offsets
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                              0  1  2  <- localIdx values
+       *                              -------
+       * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+       *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+       *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+       *      | .  3  2  1  . |  -> { 3, 2, 1 }
+       *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   view.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forAllRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forAllRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..2138ba26b417da638d42e088312ed929aa50ff94
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..23aa067531f46d1581c835ffb97f63cfa2ad65ca
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
@@ -0,0 +1,57 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix(
+      5,               // number of matrix rows
+      5,               // number of matrix columns
+      { -2, -1, 0 } ); // matrix diagonals offsets
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                              0  1  2  <- localIdx values
+       *                              -------
+       * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+       *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+       *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+       *      | .  3  2  1  . |  -> { 3, 2, 1 }
+       *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   view.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ec3f1ad70dabbf5aaf7ac170a72b10868d18df6e
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2b366ab3dc6cb84e377150dd0066eb86fc08cb85
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp
@@ -0,0 +1,52 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void laplaceOperatorMatrix()
+{
+   const int gridSize( 4 );
+   const int matrixSize = gridSize * gridSize;
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( 
+      matrixSize,                     // number of rows
+      matrixSize,                     // number of columns
+   { - gridSize, -1, 0, 1, gridSize } // diagonals offsets
+   );
+   matrix.setElements( {
+         {  0.0,  0.0, 1.0 },  // set matrix elements corresponding to boundary grid nodes
+         {  0.0,  0.0, 1.0 },  // and Dirichlet boundary conditions, i.e. 1 on the main diagonal
+         {  0.0,  0.0, 1.0 },  // which is the third one
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         { -1.0, -1.0, 4.0, -1.0, -1.0 }, // set matrix elements corresponding to inner grid nodes, i.e. 4 on the main diagonal
+         { -1.0, -1.0, 4.0, -1.0, -1.0 }, //  (the third one) and -1 to the other sub-diagonals
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         { -1.0, -1.0, 4.0, -1.0, -1.0 },
+         { -1.0, -1.0, 4.0, -1.0, -1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 },
+         {  0.0,  0.0, 1.0 }
+      } );
+   auto view = matrix.getView();
+   TNL::Containers::Vector< int, Device > rowLengths;
+   view.getCompressedRowLengths( rowLengths );
+   std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl;
+   std::cout << "Compressed row lengths: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..77d01be55991c4e8e5a7668698cbda6aba1ef0e6
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..748c9566590058b95881eacee68c4656c8890d7f
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp
@@ -0,0 +1,49 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize = 5;
+   auto diagonalsOffsets = { -2, -1, 0 };
+   using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   MatrixType matrix (
+      matrixSize,           // number of matrix columns
+      diagonalsOffsets,    
+      {  { 0.0, 0.0, 1.0 }, // matrix elements
+         { 0.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 } } );
+   auto view = matrix.getView();
+
+   /***
+    * Fetch lambda function returns diagonal element in each row.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
+      auto row = view.getRow( rowIdx );
+      return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal
+   };
+
+   /***
+    * Compute the matrix trace.
+    */
+   int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix.getRows(), std::plus<>{}, fetch, 0 );
+   std::cout << "Matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Matrix trace is: " << trace << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..bb31b7a5dd4c7109ff7d27ed0322ad04526b0b27
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bf12266623a56d9859856bad1aa2342637ccd4f9
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
+      matrixSize,   // number of matrix columns
+      { -1, 0, 1 }, // matrix diagonals offsets
+      {             // matrix elements definition
+         {  0.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0,  0.0 }
+      } );
+   auto view = matrix.getView();
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      for( int j = 0; j < matrixSize; j++ )
+         std::cout << std::setw( 5 ) << view.getElement( i, j );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..a246e2dd9284769702131e24ffd4ee2d4d8eb507
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ac322f9aa61ccece6b0f5da4e6d911b29d0d48b7
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp
@@ -0,0 +1,46 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize( 5 );
+   auto diagonalsOffsets = { -1, 0, 1 }; // Variadic templates in SharedPointer
+                                         // constructor do not recognize initializer
+                                         // list so we give it a hint.
+   using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   MatrixType matrix(
+      matrixSize,  // number of matrix rows
+      matrixSize,  // number of matrix columns
+      diagonalsOffsets );
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      auto row = view.getRow( rowIdx );
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < matrixSize - 1 )  // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+
+   /***
+    * Set the matrix elements.
+    */
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f );
+   std::cout << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..2ac03ebc3ff65d20df1530e52bf960584299ed3d
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6793d8bb29969118bd8bcce954d09364db730618
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
@@ -0,0 +1,81 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
+    *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
+    *      | 3  2  1  .  . |  -> { 3, 2, 1 }
+    *      | .  3  2  1  . |  -> { 3, 2, 1 }
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
+    * 
+    * The diagonals offsets are { -2, -1, 0 }.
+    */
+   TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { -2, -1, 0 },  // diagonals offsets
+      { { 0, 0, 1 },  // matrix elements
+        { 0, 2, 1 }, 
+        { 3, 2, 1 }, 
+        { 3, 2, 1 },
+        { 3, 2, 1 } } );
+   auto view = matrix.getView();
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   view.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..44df3d3ed38482b4495dcf4aa49f24fc09955650
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..41f93667009eece80c94dc5bbe1433c2dd460aeb
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp
@@ -0,0 +1,47 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+
+template< typename Device >
+void setElements()
+{
+   const int matrixSize( 5 );
+   auto diagonalsOffsets = { -1, 0, 1 }; // offsets of tridiagonal matrix
+   using Matrix = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   Matrix matrix( matrixSize, matrixSize, diagonalsOffsets );
+   auto view = matrix.getView();
+
+   for( int i = 0; i < 5; i++ )
+      view.setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      if( i > 0 )
+         view.setElement( i, i - 1, 1.0 );
+      view.setElement( i, i, -i );
+      if( i < matrixSize - 1 )
+         view.setElement( i, i + 1, 1.0 );
+   };
+
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Set elements on CUDA device:" << std::endl;
+   setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..05c1930583f1a7a53091b15f8aeb14b9685b3db7
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f0410315d45d92555668125a50a258d07df97d1
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt
@@ -0,0 +1,315 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_1_cuda SparseMatrixExample_Constructor_init_list_1.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_1_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_1.out
+                       OUTPUT SparseMatrixExample_Constructor_init_list_1.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2_cuda SparseMatrixExample_Constructor_init_list_2.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out
+                       OUTPUT SparseMatrixExample_Constructor_init_list_2.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map_cuda SparseMatrixExample_Constructor_std_map.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out
+                       OUTPUT SparseMatrixExample_Constructor_std_map.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getSerializationType_cuda SparseMatrixExample_getSerializationType.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getSerializationType_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getSerializationType.out
+                       OUTPUT SparseMatrixExample_getSerializationType.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities_cuda SparseMatrixExample_setRowCapacities.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out
+                       OUTPUT SparseMatrixExample_setRowCapacities.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_cuda SparseMatrixExample_setElements.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out
+                       OUTPUT SparseMatrixExample_setElements.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_map_cuda SparseMatrixExample_setElements_map.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out
+                       OUTPUT SparseMatrixExample_setElements_map.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getCompressedRowLengths_cuda SparseMatrixExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getCompressedRowLengths.out
+                       OUTPUT SparseMatrixExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getConstRow_cuda SparseMatrixExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getConstRow.out
+                       OUTPUT SparseMatrixExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getRow_cuda SparseMatrixExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getRow.out
+                       OUTPUT SparseMatrixExample_getRow.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElement_cuda SparseMatrixExample_setElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out
+                       OUTPUT SparseMatrixExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_addElement_cuda SparseMatrixExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out
+                       OUTPUT SparseMatrixExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getElement_cuda SparseMatrixExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getElement.out
+                       OUTPUT SparseMatrixExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_rowsReduction_cuda SparseMatrixExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction.out
+                       OUTPUT SparseMatrixExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_allRowsReduction_cuda SparseMatrixExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_allRowsReduction.out
+                       OUTPUT SparseMatrixExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_forRows_cuda SparseMatrixExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forRows.out
+                       OUTPUT SparseMatrixExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixExample_forAllRows_cuda SparseMatrixExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forAllRows.out
+                       OUTPUT SparseMatrixExample_forAllRows.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getSerializationType_cuda SparseMatrixViewExample_getSerializationType.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getSerializationType_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getSerializationType.out
+                       OUTPUT SparseMatrixViewExample_getSerializationType.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getCompressedRowLengths_cuda SparseMatrixViewExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT SparseMatrixViewExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getConstRow_cuda SparseMatrixViewExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getConstRow.out
+                       OUTPUT SparseMatrixViewExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getRow_cuda SparseMatrixViewExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getRow.out
+                       OUTPUT SparseMatrixViewExample_getRow.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_setElement_cuda SparseMatrixViewExample_setElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out
+                       OUTPUT SparseMatrixViewExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_addElement_cuda SparseMatrixViewExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_addElement_cuda >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_addElement.out
+                       OUTPUT SparseMatrixViewExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getElement_cuda SparseMatrixViewExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getElement.out
+                       OUTPUT SparseMatrixViewExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_rowsReduction_cuda SparseMatrixViewExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_rowsReduction.out
+                       OUTPUT SparseMatrixViewExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_allRowsReduction_cuda SparseMatrixViewExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_allRowsReduction.out
+                       OUTPUT SparseMatrixViewExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_forRows_cuda SparseMatrixViewExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forRows.out
+                       OUTPUT SparseMatrixViewExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_forAllRows_cuda SparseMatrixViewExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forAllRows.out
+                       OUTPUT SparseMatrixViewExample_forAllRows.out )
+
+ELSE()
+   ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_1 SparseMatrixExample_Constructor_init_list_1.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_1 >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_1.out
+                       OUTPUT SparseMatrixExample_Constructor_init_list_1.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2 SparseMatrixExample_Constructor_init_list_2.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2 >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out
+                       OUTPUT SparseMatrixExample_Constructor_init_list_2.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map SparseMatrixExample_Constructor_std_map.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out
+                       OUTPUT SparseMatrixExample_Constructor_std_map.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_getSerializationType SparseMatrixExample_getSerializationType.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getSerializationType >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getSerializationType.out
+                       OUTPUT SparseMatrixExample_getSerializationType.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities SparseMatrixExample_setRowCapacities.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out
+                       OUTPUT SparseMatrixExample_setRowCapacities.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_setElements SparseMatrixExample_setElements.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out
+                       OUTPUT SparseMatrixExample_setElements.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_setElements_map SparseMatrixExample_setElements_map.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out
+                       OUTPUT SparseMatrixExample_setElements_map.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_getCompressedRowLengths SparseMatrixExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getCompressedRowLengths.out
+                       OUTPUT SparseMatrixExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_getConstRow SparseMatrixExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getConstRow.out
+                       OUTPUT SparseMatrixExample_getConstRow.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_getRow SparseMatrixExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getRow.out
+                       OUTPUT SparseMatrixExample_getRow.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_setElement SparseMatrixExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out
+                       OUTPUT SparseMatrixExample_setElement.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_addElement SparseMatrixExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out
+                       OUTPUT SparseMatrixExample_addElement.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_getElement SparseMatrixExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getElement.out
+                       OUTPUT SparseMatrixExample_getElement.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_rowsReduction SparseMatrixExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction.out
+                       OUTPUT SparseMatrixExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_allRowsReduction SparseMatrixExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_allRowsReduction.out
+                       OUTPUT SparseMatrixExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_forRows SparseMatrixExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forRows.out
+                       OUTPUT SparseMatrixExample_forRows.out )
+
+   ADD_EXECUTABLE( SparseMatrixExample_forAllRows SparseMatrixExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forAllRows.out
+                       OUTPUT SparseMatrixExample_forAllRows.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_getSerializationType SparseMatrixViewExample_getSerializationType.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getSerializationType >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getSerializationType.out
+                       OUTPUT SparseMatrixViewExample_getSerializationType.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_getCompressedRowLengths SparseMatrixViewExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT SparseMatrixViewExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_getConstRow SparseMatrixViewExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getConstRow.out
+                       OUTPUT SparseMatrixViewExample_getConstRow.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_getRow SparseMatrixViewExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getRow.out
+                       OUTPUT SparseMatrixViewExample_getRow.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_setElement SparseMatrixViewExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out
+                       OUTPUT SparseMatrixViewExample_setElement.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_addElement SparseMatrixViewExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_addElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_addElement.out
+                       OUTPUT SparseMatrixViewExample_addElement.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_getElement SparseMatrixViewExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getElement.out
+                       OUTPUT SparseMatrixViewExample_getElement.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_rowsReduction SparseMatrixViewExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_rowsReduction.out
+                       OUTPUT SparseMatrixViewExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_allRowsReduction SparseMatrixViewExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_allRowsReduction.out
+                       OUTPUT SparseMatrixViewExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_forRows SparseMatrixViewExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forRows.out
+                       OUTPUT SparseMatrixViewExample_forRows.out )
+
+   ADD_EXECUTABLE( SparseMatrixViewExample_forAllRows SparseMatrixViewExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forAllRows.out
+                       OUTPUT SparseMatrixViewExample_forAllRows.out )
+
+ENDIF()
+
+ADD_CUSTOM_TARGET( RunSparseMatricesExamples ALL DEPENDS
+   SparseMatrixExample_Constructor_init_list_1.out
+   SparseMatrixExample_Constructor_init_list_2.out
+   SparseMatrixExample_Constructor_std_map.out
+   SparseMatrixExample_getSerializationType.out
+   SparseMatrixExample_setRowCapacities.out
+   SparseMatrixExample_setElements.out
+   SparseMatrixExample_setElements_map.out
+   SparseMatrixExample_getCompressedRowLengths.out
+   SparseMatrixExample_getConstRow.out
+   SparseMatrixExample_getRow.out
+   SparseMatrixExample_setElement.out
+   SparseMatrixExample_addElement.out
+   SparseMatrixExample_getElement.out
+   SparseMatrixExample_rowsReduction.out
+   SparseMatrixExample_allRowsReduction.out
+   SparseMatrixExample_forRows.out
+   SparseMatrixExample_forAllRows.out
+   SparseMatrixViewExample_getSerializationType.out
+   SparseMatrixViewExample_getCompressedRowLengths.out
+   SparseMatrixViewExample_getConstRow.out
+   SparseMatrixViewExample_getRow.out
+   SparseMatrixViewExample_setElement.out
+   SparseMatrixViewExample_addElement.out
+   SparseMatrixViewExample_getElement.out
+   SparseMatrixViewExample_rowsReduction.out
+   SparseMatrixViewExample_allRowsReduction.out
+   SparseMatrixViewExample_forRows.out
+   SparseMatrixViewExample_forAllRows.out
+)
+
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..92524b173b8e3d1b03b6280ca101b6de415fe427
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp
@@ -0,0 +1,28 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void initializerListExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix {
+      {  1,  2,  3,  4,  5 }, // row capacities
+      6 };                    // number of matrix columns
+
+   for( int row = 0; row < matrix.getRows(); row++ )
+      for( int column = 0; column <= row; column++ )
+         matrix.setElement( row, column, row - column + 1 );
+   std::cout << "General sparse matrix: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrices on CPU ... " << std::endl;
+   initializerListExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrices on CUDA GPU ... " << std::endl;
+   initializerListExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4afbecd7fbbc3cf0cb99df2cba261f6feb557037
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cu
@@ -0,0 +1 @@
+SparseMatrixExample_Constructor_init_list_1.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e14f0618887d2e64fee590ec2e1390fa3c39e77b
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp
@@ -0,0 +1,31 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void initializerListExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix (
+      5, // number of matrix rows
+      5, // number of matrix columns
+      {  // matrix elements definition
+         {  0,  0,  2.0 },
+         {  1,  0, -1.0 }, {  1,  1,  2.0 }, {  1,  2, -1.0 },
+         {  2,  1, -1.0 }, {  2,  2,  2.0 }, {  2,  3, -1.0 },
+         {  3,  2, -1.0 }, {  3,  3,  2.0 }, {  3,  4, -1.0 },
+         {  4,  4,  2.0 } } );
+
+   std::cout << "General sparse matrix: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrices on CPU ... " << std::endl;
+   initializerListExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrices on CUDA GPU ... " << std::endl;
+   initializerListExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu
new file mode 120000
index 0000000000000000000000000000000000000000..112624ab390c7deac6e6e84eec346f7931d0a583
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu
@@ -0,0 +1 @@
+SparseMatrixExample_Constructor_init_list_2.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8f64d0e9b5363e0845f9e392cd23b9c2057c5363
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp
@@ -0,0 +1,38 @@
+#include <iostream>
+#include <map>
+#include <utility>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void initializerListExample()
+{
+   std::map< std::pair< int, int >, double > map;
+   map.insert( std::make_pair( std::make_pair( 0, 0 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 1, 0 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 1, 1 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 1, 2 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 2, 1 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 2, 2 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 2, 3 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 3, 2 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 3, 3 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 3, 4 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 4, 4 ),  2.0 ) );
+
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, map );
+
+   std::cout << "General sparse matrix: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrices on CPU ... " << std::endl;
+   initializerListExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrices on CUDA GPU ... " << std::endl;
+   initializerListExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu
new file mode 120000
index 0000000000000000000000000000000000000000..3f08e48c77bb1fdeb5eb54a2e8c61748db90624c
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu
@@ -0,0 +1 @@
+SparseMatrixExample_Constructor_std_map.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1b51f494f244ccf3e2054c25b4b77da12c9806ed
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp
@@ -0,0 +1,30 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 5, 5, 5, 5, 5 }, 5 );
+   for( int i = 0; i < 5; i++ )
+      matrix.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < 5; i++ )
+      for( int j = 0; j < 5; j++ )
+         matrix.addElement( i, j, 1.0, 5.0 );
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..2ec36ad7459e3947e785ac44b3a0718f37d86c2d
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu
@@ -0,0 +1 @@
+SparseMatrixExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ce31f09e42936f4c1a754b7cc94978a1e8ab1d4
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
@@ -0,0 +1,67 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void allRowsReduction()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
+      { 0, 0, 1 },
+      { 1, 1, 1 }, { 1, 2, 8 },
+      { 2, 2, 1 }, { 2, 3, 9 },
+      { 3, 3, 1 }, { 3, 4, 9 },
+      { 4, 4, 1 } } );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view and matrix view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "All rows reduction on host:" << std::endl;
+   allRowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "All rows reduction on CUDA device:" << std::endl;
+   allRowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f087b816fa0cd3b657956475bb9c4c0f3f9769dc
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu
@@ -0,0 +1 @@
+SparseMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..739600539260bba9e11c703c83b2d56ed8a75ff7
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forAllRowsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 );
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) {
+      if( rowIdx < columnIdx )  // This is important, some matrix formats may allocate more matrix elements
+                                // than we requested. These padding elements are processed here as well.
+         compute = false;
+      else
+      {
+         columnIdx = localIdx;
+         value = rowIdx + localIdx;
+      }
+   };
+
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forAllRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forAllRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..51cc7bd49f1754bd7d04c05c448d852572599cef
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cu
@@ -0,0 +1 @@
+SparseMatrixExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2330c2ca5d94439726dc4df53ef9977116d43de0
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 );
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) {
+      if( rowIdx < columnIdx )  // This is important, some matrix formats may allocate more matrix elements
+                                // than we requested. These padding elements are processed here as well.
+         compute = false;
+      else
+      {
+         columnIdx = localIdx;
+         value = rowIdx + localIdx;
+      }
+   };
+
+   matrix.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..87c20fbe0e9e4ca72cd80150073726e21813b0cf
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu
@@ -0,0 +1 @@
+SparseMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e56abe484cdcc5f14bcb35dfc8f16d53946dd683
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp
@@ -0,0 +1,34 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getCompressedRowLengthsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > triangularMatrix( 5, 5 );
+   triangularMatrix.setElements( {
+      { 0, 0,  1 },
+      { 1, 0,  2 }, { 1, 1,  3 },
+      { 2, 0,  4 }, { 2, 1,  5 }, { 2, 2,  6 },
+      { 3, 0,  7 }, { 3, 1,  8 }, { 3, 2,  9 }, { 3, 3, 10 },
+      { 4, 0, 11 }, { 4, 1, 12 }, { 4, 2, 13 }, { 4, 3, 14 }, { 4, 4, 15 } } );
+
+   std::cout << triangularMatrix << std::endl;
+
+   TNL::Containers::Vector< int, Device > rowLengths;
+   triangularMatrix.getCompressedRowLengths( rowLengths );
+
+   std::cout << "Compressed row lengths are: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting compressed row lengths on host: " << std::endl;
+   getCompressedRowLengthsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting compressed row lengths on CUDA device: " << std::endl;
+   getCompressedRowLengthsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..8fc20b77f27feda756430c7cfadc718cf1f81a71
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+SparseMatrixExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..747a3c8258bc856034bbe07ba3867ccdd7e8816a
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp
@@ -0,0 +1,52 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   using MatrixType = TNL::Matrices::SparseMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< MatrixType > matrix ( 5, 5 );
+   matrix->setElements(
+      {  { 0, 0, 1 },
+         { 1, 0, 1 }, { 1, 1, 2 },
+         { 2, 0, 1 }, { 2, 1, 2 }, { 2, 2, 3 },
+         { 3, 0, 1 }, { 3, 1, 2 }, { 3, 2, 3 }, { 3, 3, 4 },
+         { 4, 0, 1 }, { 4, 1, 2 }, { 4, 2, 3 }, { 4, 3, 4 }, { 4, 4, 5 } } );
+
+   /***
+    * Fetch lambda function returns diagonal element in each row.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
+      auto row = matrix->getRow( rowIdx );
+      return row.getValue( rowIdx );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use SparseMatrixView. See
+    * SparseMatrixView::getConstRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+
+   /***
+    * Compute the matrix trace.
+    */
+   int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix->getRows(), std::plus<>{}, fetch, 0 );
+   std::cout << "Matrix trace is " << trace << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..03a7d1e7a9b8ec37ac8f35bd297dec564693d8fd
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cu
@@ -0,0 +1 @@
+SparseMatrixExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4db76597950f21c8897e3864916775fc353fe9e7
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp
@@ -0,0 +1,37 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix (
+      5, // number of matrix rows
+      5, // number of matrix columns
+      {  // matrix elements definition
+         {  0,  0,  2.0 },
+         {  1,  0, -1.0 }, {  1,  1,  2.0 }, {  1,  2, -1.0 },
+         {  2,  1, -1.0 }, {  2,  2,  2.0 }, {  2,  3, -1.0 },
+         {  3,  2, -1.0 }, {  3,  3,  2.0 }, {  3,  4, -1.0 },
+         {  4,  4,  2.0 } } );
+
+
+   for( int i = 0; i < 5; i++ )
+   {
+      for( int j = 0; j < 5; j++ )
+         std::cout << std::setw( 5 ) << matrix.getElement( i, j );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..bb3bc66362385752f09020b95a7d7c00d890af76
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cu
@@ -0,0 +1 @@
+SparseMatrixExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d52602f082953a13d8a6fa54fcc01e52f0a6c749
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp
@@ -0,0 +1,45 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   auto rowCapacities = { 1, 1, 1, 1, 1 }; // Variadic templates in SharedPointer
+                                           // constructor do not recognize initializer
+                                           // list so we give it a hint.
+   using MatrixType = TNL::Matrices::SparseMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< MatrixType > matrix( rowCapacities, 5 );
+
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      auto row = matrix->getRow( rowIdx );
+      row.setElement( 0, rowIdx, 10 * ( rowIdx + 1 ) );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use SparseMatrixView. See
+    * SparseMatrixView::getRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+
+   /***
+    * Set the matrix elements.
+    */
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f );
+   std::cout << *matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..776f8a9d3819ec6fb8392a9373aa8e761996f8f2
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cu
@@ -0,0 +1 @@
+SparseMatrixExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..35dbf04063743896ce8d8b2e7e31d8c5721b6129
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp
@@ -0,0 +1,23 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void getSerializationTypeExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix;
+
+   std::cout << "Matrix type is: " << matrix.getSerializationType();
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get serialization type on CPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get serialization type on CUDA GPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cu
new file mode 120000
index 0000000000000000000000000000000000000000..a042183a0a0203b339c25d9aab429518570f9689
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cu
@@ -0,0 +1 @@
+SparseMatrixExample_getSerializationType.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..20279888a38b10ff41dfef49a44cb4a546f19359
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
@@ -0,0 +1,66 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
+      { 0, 0, 1 },
+      { 1, 1, 1 }, { 1, 2, 8 },
+      { 2, 2, 1 }, { 2, 3, 9 },
+      { 3, 3, 1 }, { 3, 4, 9 },
+      { 4, 4, 1 } } );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..212f162886a3d0bb77aca1b75d7596048409df8b
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu
@@ -0,0 +1 @@
+SparseMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..178e502dca1fe4b249397173fda21305e3e152e6
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp
@@ -0,0 +1,45 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+
+template< typename Device >
+void setElements()
+{
+   auto rowCapacities = { 1, 1, 1, 1, 1 };
+   TNL::Pointers::SharedPointer< TNL::Matrices::SparseMatrix< double, Device > > matrix( rowCapacities, 5 );
+   for( int i = 0; i < 5; i++ )
+      matrix->setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << *matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      matrix->setElement( i, i, -i );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use SparseMatrixView. See
+    * SparseMatrixView::getRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << *matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Set elements on CUDA device:" << std::endl;
+   setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..97b115c3fe344d86091e530bd9b8fe7fd63bced5
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu
@@ -0,0 +1 @@
+SparseMatrixExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ff0c0bde8aeada867578c43c1e2e2da3fd90ca8f
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp
@@ -0,0 +1,29 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void setElementsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5 ); // matrix dimensions
+   matrix.setElements( {                                          // matrix elements definition
+      {  0,  0,  2.0 },
+      {  1,  0, -1.0 }, {  1,  1,  2.0 }, {  1,  2, -1.0 },
+      {  2,  1, -1.0 }, {  2,  2,  2.0 }, {  2,  3, -1.0 },
+      {  3,  2, -1.0 }, {  3,  3,  2.0 }, {  3,  4, -1.0 },
+      {  4,  4,  2.0 } } );
+
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Setting matrix elements on host: " << std::endl;
+   setElementsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Setting matrix elements on CUDA device: " << std::endl;
+   setElementsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..dfbad6b2724a487f50f94174bc93aee084c26729
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu
@@ -0,0 +1 @@
+SparseMatrixExample_setElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c7bdbbc1738a370fd9120cb461312b6bcc275b6e
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <map>
+#include <utility>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void setElementsExample()
+{
+   std::map< std::pair< int, int >, double > map;
+   map.insert( std::make_pair( std::make_pair( 0, 0 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 1, 0 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 1, 1 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 1, 2 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 2, 1 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 2, 2 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 2, 3 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 3, 2 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 3, 3 ),  2.0 ) );
+   map.insert( std::make_pair( std::make_pair( 3, 4 ), -1.0 ) );
+   map.insert( std::make_pair( std::make_pair( 4, 4 ),  2.0 ) );
+
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5 );
+   matrix.setElements( map );
+
+   std::cout << "General sparse matrix: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrices on CPU ... " << std::endl;
+   setElementsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrices on CUDA GPU ... " << std::endl;
+   setElementsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu
new file mode 120000
index 0000000000000000000000000000000000000000..aaf831b5d5e38e29dfeb37589597c69c1469e2e9
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu
@@ -0,0 +1 @@
+SparseMatrixExample_setElements_map.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f282aee6d724c01925d83ee9c9ec79ac3d1a8a66
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp
@@ -0,0 +1,29 @@
+#include <iostream>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void setRowCapacitiesExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( 5, 5 );
+   TNL::Containers::Vector< int, Device > rowCapacities{ 1, 2, 3, 4, 5 };
+   matrix.setRowCapacities( rowCapacities );
+   for( int row = 0; row < 5; row++ )
+      for( int column = 0; column <= row; column++ )
+         matrix.setElement( row, column, row - column + 1 );
+
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrices on CPU ... " << std::endl;
+   setRowCapacitiesExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrices on CUDA GPU ... " << std::endl;
+   setRowCapacitiesExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu
new file mode 120000
index 0000000000000000000000000000000000000000..77bb1a91f96edc23a4e80b591dc123ef59230077
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu
@@ -0,0 +1 @@
+SparseMatrixExample_setRowCapacities.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9871885195ea40c53ca8b4be637972bb501e1e79
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp
@@ -0,0 +1,31 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 5, 5, 5, 5, 5 }, 5 );
+   auto view = matrix.getView();
+   for( int i = 0; i < 5; i++ )
+      view.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < 5; i++ )
+      for( int j = 0; j < 5; j++ )
+         view.addElement( i, j, 1.0, 5.0 );
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..505aa14b1004cda30a853c9d74616bf06cb75758
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..98a03b941697c10c29609febfb5be0a01cd635cb
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
@@ -0,0 +1,68 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void allRowsReduction()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
+      { 0, 0, 1 },
+      { 1, 1, 1 }, { 1, 2, 8 },
+      { 2, 2, 1 }, { 2, 3, 9 },
+      { 3, 3, 1 }, { 3, 4, 9 },
+      { 4, 4, 1 } } );
+   auto matrixView = matrix.getView();
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view and matrix view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrixView.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "All rows reduction on host:" << std::endl;
+   allRowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "All rows reduction on CUDA device:" << std::endl;
+   allRowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..d63cf05c5786aa73da2585050804d26340c75a9f
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fda71a42ff2cbf17520e36ca1390f311441a0c98
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp
@@ -0,0 +1,36 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forAllRowsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 );
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) {
+      if( rowIdx < columnIdx )  // This is important, some matrix formats may allocate more matrix elements
+                                // than we requested. These padding elements are processed here as well.
+         compute = false;
+      else
+      {
+         columnIdx = localIdx;
+         value = rowIdx + localIdx;
+      }
+   };
+
+   view.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forAllRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forAllRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..dd77d11f9d9a75474a5e880d5167ff2a3640ba6b
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..987c3dec4364fac94cf9c25f2dd7c4aa8493f184
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
@@ -0,0 +1,36 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 );
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) {
+      if( rowIdx < columnIdx )  // This is important, some matrix formats may allocate more matrix elements
+                                // than we requested. These padding elements are processed here as well.
+         compute = false;
+      else
+      {
+         columnIdx = localIdx;
+         value = rowIdx + localIdx;
+      }
+   };
+
+   view.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..5058dc6cfd7adb63f9d10d2699d6b9b530fd6c90
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0cc12da83447dffca728a0ed26346c208b7122fe
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getCompressedRowLengthsExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > triangularMatrix( 5, 5 );
+   triangularMatrix.setElements( {
+      { 0, 0,  1 },
+      { 1, 0,  2 }, { 1, 1,  3 },
+      { 2, 0,  4 }, { 2, 1,  5 }, { 2, 2,  6 },
+      { 3, 0,  7 }, { 3, 1,  8 }, { 3, 2,  9 }, { 3, 3, 10 },
+      { 4, 0, 11 }, { 4, 1, 12 }, { 4, 2, 13 }, { 4, 3, 14 }, { 4, 4, 15 } } );
+
+   std::cout << triangularMatrix << std::endl;
+
+   auto view = triangularMatrix.getView();
+   TNL::Containers::Vector< int, Device > rowLengths;
+   view.getCompressedRowLengths( rowLengths );
+
+   std::cout << "Compressed row lengths are: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting compressed row lengths on host: " << std::endl;
+   getCompressedRowLengthsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting compressed row lengths on CUDA device: " << std::endl;
+   getCompressedRowLengthsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6fa51b25b0b105fac248451866d35f16b22683da
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8c10b6a8040d50d25eff859dfb11b0db5a7ae70d
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp
@@ -0,0 +1,44 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
+     { 0, 0, 1 },
+     { 1, 0, 1 }, { 1, 1, 2 },
+     { 2, 0, 1 }, { 2, 1, 2 }, { 2, 2, 3 },
+     { 3, 0, 1 }, { 3, 1, 2 }, { 3, 2, 3 }, { 3, 3, 4 },
+     { 4, 0, 1 }, { 4, 1, 2 }, { 4, 2, 3 }, { 4, 3, 4 }, { 4, 4, 5 } } );
+   auto matrixView = matrix.getView();
+
+   /***
+    * Fetch lambda function returns diagonal element in each row.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
+      auto row = matrixView.getRow( rowIdx );
+      return row.getValue( rowIdx );
+   };
+
+   /***
+    * Compute the matrix trace.
+    */
+   int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix.getRows(), std::plus<>{}, fetch, 0 );
+   std::cout << "Matrix trace is " << trace << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..a1501e37864344438b6e2ceb064ce84f8943d49f
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e14134f3232a8acbb3840ce8b916c9bcbcf08bff
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp
@@ -0,0 +1,37 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix (
+      5, // number of matrix rows
+      5, // number of matrix columns
+      {  // matrix elements definition
+         {  0,  0,  2.0 },
+         {  1,  0, -1.0 }, {  1,  1,  2.0 }, {  1,  2, -1.0 },
+         {  2,  1, -1.0 }, {  2,  2,  2.0 }, {  2,  3, -1.0 },
+         {  3,  2, -1.0 }, {  3,  3,  2.0 }, {  3,  4, -1.0 },
+         {  4,  4,  2.0 } } );
+   auto view = matrix.getView();
+
+   for( int i = 0; i < 5; i++ )
+   {
+      for( int j = 0; j < 5; j++ )
+         std::cout << std::setw( 5 ) << view.getElement( i, j );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..03d5d32f23fb9439a210fc6aa718212a1fa513a1
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..569fabb6af88457c9820aa8fb483b8f2eeb3fb70
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 1, 1, 1, 1 }, 5 );
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      auto row = view.getRow( rowIdx );
+      row.setElement( 0, rowIdx, 10 * ( rowIdx + 1 ) );
+   };
+
+   /***
+    * Set the matrix elements.
+    */
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..56b0f7e6275b55114ff304dafa40e2fbe3c80713
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3147bd2919eaea3188366c943b0239d385ac6e63
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp
@@ -0,0 +1,24 @@
+#include <iostream>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void getSerializationTypeExample()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix;
+   auto view = matrix.getView();
+
+   std::cout << "Matrix type is: " << view.getSerializationType();
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get serialization type on CPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get serialization type on CUDA GPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cu
new file mode 120000
index 0000000000000000000000000000000000000000..9ddc5c6f29d86ca559c745504a2acbeb9fa5e005
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_getSerializationType.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..69e2ff6fd2cd77d06235e143b44d854da4de414b
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
@@ -0,0 +1,67 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
+      { 0, 0, 1 },
+      { 1, 1, 1 }, { 1, 2, 8 },
+      { 2, 2, 1 }, { 2, 3, 9 },
+      { 3, 3, 1 }, { 3, 4, 9 },
+      { 4, 4, 1 } } );
+   auto matrixView = matrix.getView();
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrixView.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f244c8372ea90bd142f8fae912ae48d32d3be0fa
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3de6634a3bdd492135e83badc0d1febf02e9b5d3
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+
+template< typename Device >
+void setElements()
+{
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 1, 1, 1, 1 }, 5 );
+   auto view = matrix.getView();
+   for( int i = 0; i < 5; i++ )
+      view.setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      view.setElement( i, i, -i );
+   };
+
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Set elements on CUDA device:" << std::endl;
+   setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..3dba0b9ec39a9e39526e94415eefb06411c48d02
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f66e71a4ab43a13ceec55d7d343ff1045acd48d
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt
@@ -0,0 +1,269 @@
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_Constructor_init_list_1_cuda TridiagonalMatrixExample_Constructor_init_list_1.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_Constructor_init_list_1_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_Constructor_init_list_1.out
+                       OUTPUT TridiagonalMatrixExample_Constructor_init_list_1.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getSerializationType_cuda TridiagonalMatrixExample_getSerializationType.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getSerializationType_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getSerializationType.out
+                       OUTPUT TridiagonalMatrixExample_getSerializationType.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_setElements_cuda TridiagonalMatrixExample_setElements.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElements_cuda > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElements.out
+                       OUTPUT TridiagonalMatrixExample_setElements.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getCompressedRowLengths_cuda TridiagonalMatrixExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getCompressedRowLengths.out
+                       OUTPUT TridiagonalMatrixExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getConstRow_cuda TridiagonalMatrixExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getConstRow.out
+                       OUTPUT TridiagonalMatrixExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getRow_cuda TridiagonalMatrixExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getRow.out
+                       OUTPUT TridiagonalMatrixExample_getRow.out )
+
+# This example does not work with nvcc 10.1. Restore it here when it works.
+#   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_setElement_cuda TridiagonalMatrixExample_setElement.cu )
+#   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement_cuda >
+#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out
+#                       OUTPUT TridiagonalMatrixExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_addElement_cuda TridiagonalMatrixExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_addElement.out
+                       OUTPUT TridiagonalMatrixExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getElement_cuda TridiagonalMatrixExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getElement.out
+                       OUTPUT TridiagonalMatrixExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_rowsReduction_cuda TridiagonalMatrixExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_rowsReduction.out
+                       OUTPUT TridiagonalMatrixExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_allRowsReduction_cuda TridiagonalMatrixExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_allRowsReduction.out
+                       OUTPUT TridiagonalMatrixExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_forRows_cuda TridiagonalMatrixExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forRows.out
+                       OUTPUT TridiagonalMatrixExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_forAllRows_cuda TridiagonalMatrixExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forAllRows.out
+                       OUTPUT TridiagonalMatrixExample_forAllRows.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getCompressedRowLengths_cuda TridiagonalMatrixViewExample_getCompressedRowLengths.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getCompressedRowLengths_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT TridiagonalMatrixViewExample_getCompressedRowLengths.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getConstRow_cuda TridiagonalMatrixViewExample_getConstRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getConstRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getConstRow.out
+                       OUTPUT TridiagonalMatrixViewExample_getConstRow.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getRow_cuda TridiagonalMatrixViewExample_getRow.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getRow_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getRow.out
+                       OUTPUT TridiagonalMatrixViewExample_getRow.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_setElement_cuda TridiagonalMatrixViewExample_setElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_setElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_setElement.out
+                       OUTPUT TridiagonalMatrixViewExample_setElement.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_addElement_cuda TridiagonalMatrixViewExample_addElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_addElement_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_addElement.out
+                       OUTPUT TridiagonalMatrixViewExample_addElement.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getElement_cuda TridiagonalMatrixViewExample_getElement.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getElement_cuda >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getElement.out
+                       OUTPUT TridiagonalMatrixViewExample_getElement.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_rowsReduction_cuda TridiagonalMatrixViewExample_rowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_rowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_rowsReduction.out
+                       OUTPUT TridiagonalMatrixViewExample_rowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_allRowsReduction_cuda TridiagonalMatrixViewExample_allRowsReduction.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_allRowsReduction_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_allRowsReduction.out
+                       OUTPUT TridiagonalMatrixViewExample_allRowsReduction.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_forRows_cuda TridiagonalMatrixViewExample_forRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forRows.out
+                       OUTPUT TridiagonalMatrixViewExample_forRows.out )
+
+   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_forAllRows_cuda TridiagonalMatrixViewExample_forAllRows.cu )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forAllRows_cuda >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forAllRows.out
+                       OUTPUT TridiagonalMatrixViewExample_forAllRows.out )
+
+ELSE()
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_Constructor_init_list_1 TridiagonalMatrixExample_Constructor_init_list_1.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_Constructor_init_list_1 >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_Constructor_init_list_1.out
+                       OUTPUT TridiagonalMatrixExample_Constructor_init_list_1.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_getSerializationType TridiagonalMatrixExample_getSerializationType.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getSerializationType > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getSerializationType.out
+                       OUTPUT TridiagonalMatrixExample_getSerializationType.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_setElements TridiagonalMatrixExample_setElements.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElements > 
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElements.out
+                       OUTPUT TridiagonalMatrixExample_setElements.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_getCompressedRowLengths TridiagonalMatrixExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getCompressedRowLengths.out
+                       OUTPUT TridiagonalMatrixExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_getConstRow TridiagonalMatrixExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getConstRow.out
+                       OUTPUT TridiagonalMatrixExample_getConstRow.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_getRow TridiagonalMatrixExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getRow.out
+                       OUTPUT TridiagonalMatrixExample_getRow.out )
+
+#  This example does not work with nvcc 10.1. Restore it here when it works.
+#   ADD_EXECUTABLE( TridiagonalMatrixExample_setElement TridiagonalMatrixExample_setElement.cpp )
+#   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement >
+#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out
+#                       OUTPUT TridiagonalMatrixExample_setElement.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_addElement TridiagonalMatrixExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_addElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_addElement.out
+                       OUTPUT TridiagonalMatrixExample_addElement.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_getElement TridiagonalMatrixExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getElement.out
+                       OUTPUT TridiagonalMatrixExample_getElement.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_rowsReduction TridiagonalMatrixExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_rowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_rowsReduction.out
+                       OUTPUT TridiagonalMatrixExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_allRowsReduction TridiagonalMatrixExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_allRowsReduction >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_allRowsReduction.out
+                       OUTPUT TridiagonalMatrixExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_forRows TridiagonalMatrixExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forRows >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forRows.out
+                       OUTPUT TridiagonalMatrixExample_forRows.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_forAllRows TridiagonalMatrixExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forAllRows.out
+                       OUTPUT TridiagonalMatrixExample_forAllRows.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getCompressedRowLengths TridiagonalMatrixViewExample_getCompressedRowLengths.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getCompressedRowLengths >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getCompressedRowLengths.out
+                       OUTPUT TridiagonalMatrixViewExample_getCompressedRowLengths.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getConstRow TridiagonalMatrixViewExample_getConstRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getConstRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getConstRow.out
+                       OUTPUT TridiagonalMatrixViewExample_getConstRow.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getRow TridiagonalMatrixViewExample_getRow.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getRow >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getRow.out
+                       OUTPUT TridiagonalMatrixViewExample_getRow.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_setElement TridiagonalMatrixViewExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_setElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_setElement.out
+                       OUTPUT TridiagonalMatrixViewExample_setElement.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_addElement TridiagonalMatrixViewExample_addElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_addElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_addElement.out
+                       OUTPUT TridiagonalMatrixViewExample_addElement.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getElement TridiagonalMatrixViewExample_getElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getElement >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getElement.out
+                       OUTPUT TridiagonalMatrixViewExample_getElement.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_rowsReduction TridiagonalMatrixViewExample_rowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_rowsReduction >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_rowsReduction.out
+                       OUTPUT TridiagonalMatrixViewExample_rowsReduction.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_allRowsReduction TridiagonalMatrixViewExample_allRowsReduction.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_allRowsReduction >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_allRowsReduction.out
+                       OUTPUT TridiagonalMatrixViewExample_allRowsReduction.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_forRows TridiagonalMatrixViewExample_forRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forRows >
+                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forRows.out
+                       OUTPUT TridiagonalMatrixViewExample_forRows.out )
+
+   ADD_EXECUTABLE( TridiagonalMatrixViewExample_forAllRows TridiagonalMatrixViewExample_forAllRows.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forAllRows >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forAllRows.out
+                       OUTPUT TridiagonalMatrixViewExample_forAllRows.out )
+
+ENDIF()
+
+   ADD_EXECUTABLE( TridiagonalMatrixExample_setElement TridiagonalMatrixExample_setElement.cpp )
+   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement >
+                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out
+                       OUTPUT TridiagonalMatrixExample_setElement.out )
+
+
+ADD_CUSTOM_TARGET( RunTridiagonalMatricesExamples ALL DEPENDS
+   TridiagonalMatrixExample_Constructor_init_list_1.out
+   TridiagonalMatrixExample_getSerializationType.out
+   TridiagonalMatrixExample_setElements.out
+   TridiagonalMatrixExample_getCompressedRowLengths.out
+   TridiagonalMatrixExample_getConstRow.out
+   TridiagonalMatrixExample_getRow.out
+   TridiagonalMatrixExample_setElement.out
+   TridiagonalMatrixExample_addElement.out
+   TridiagonalMatrixExample_getElement.out
+   TridiagonalMatrixExample_rowsReduction.out
+   TridiagonalMatrixExample_allRowsReduction.out
+   TridiagonalMatrixExample_forRows.out
+   TridiagonalMatrixExample_forAllRows.out
+   TridiagonalMatrixViewExample_getCompressedRowLengths.out
+   TridiagonalMatrixViewExample_getConstRow.out
+   TridiagonalMatrixViewExample_getRow.out
+   TridiagonalMatrixViewExample_setElement.out
+   TridiagonalMatrixViewExample_addElement.out
+   TridiagonalMatrixViewExample_getElement.out
+   TridiagonalMatrixViewExample_rowsReduction.out
+   TridiagonalMatrixViewExample_allRowsReduction.out
+   TridiagonalMatrixViewExample_forRows.out
+   TridiagonalMatrixViewExample_forAllRows.out
+)
+
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c4d8c893473cc2957cf710db5ffb451a0cfb94a9
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp
@@ -0,0 +1,58 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void createTridiagonalMatrix()
+{
+   const int matrixSize = 6;
+
+   /***
+    * Setup the following matrix (dots represent zeros):
+    * 
+    * /  2 -1 .   .  .  . \
+    * | -1  2 -1  .  .  . |
+    * |  . -1  2 -1  .  . |
+    * |  .  . -1  2 -1  . |
+    * |  .  .  . -1  2 -1 |
+    * \  .  .  .  . -1  2 /
+    * 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix( 
+      matrixSize, {
+   /***
+    * To set the matrix elements we first extend the diagonals to their full
+    * lengths even outside the matrix (dots represent zeros and zeros are
+    * artificial zeros used for memory alignment):
+    * 
+    * 0 /  2 -1 .   .  .  . \    -> {  0,  2, -1 }
+    *   | -1  2 -1  .  .  . |    -> { -1,  2, -1 }
+    *   |  . -1  2 -1  .  . |    -> { -1,  2, -1 }
+    *   |  .  . -1  2 -1  . |    -> { -1,  2, -1 }
+    *   |  .  .  . -1  2 -1 |    -> { -1,  2, -1 }
+    *   \  .  .  .  . -1  2 / 0  -> { -1,  2,  0 }
+    * 
+    */
+      {  0,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2,  0 }
+      } );
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating tridiagonal matrix on CPU ... " << std::endl;
+   createTridiagonalMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating tridiagonal matrix on CUDA GPU ... " << std::endl;
+   createTridiagonalMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cu
new file mode 120000
index 0000000000000000000000000000000000000000..5c6620a5d4f6c05bfee3ef8aaee8df692467940b
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_Constructor_init_list_1.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..66876f1e343430df0b0863119969e1702baba1b6
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix(
+      matrixSize,    // number of rows
+      matrixSize     // number of columns
+   );
+   for( int i = 0; i < matrixSize; i++ )
+      matrix.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      if( i > 0 )
+         matrix.addElement( i, i - 1, 1.0, 5.0 );
+      matrix.addElement( i, i, 1.0, 5.0 );
+      if( i < matrixSize - 1 )
+         matrix.addElement( i, i + 1, 1.0, 5.0 );
+   }
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..be78eddc3fca4e9dadf0848f56ef93ca832362f6
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..043dcc82efc2203f8f9b52a4a90a70d7a1e25ae9
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
@@ -0,0 +1,78 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *    | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *    | .  2  1  3  . |   -> { 2, 1, 3 }
+    *    | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    * 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { { 0, 1, 3 },  // matrix elements
+        { 2, 1, 3 }, 
+        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 } } );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..795c4febffbbb109b99553e8437c33db942bcab7
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ff3fdee91c080afd212718ddbf7159ab6f479164
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp
@@ -0,0 +1,53 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *   | .  2  1  3  . |   -> { 2, 1, 3 }
+    *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix(
+      5,      // number of matrix rows
+      5 );    // number of matrix columns
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                           0  1  2  <- localIdx values
+       *                           -------
+       * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+       *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+       *   | .  2  1  3  . |   -> { 2, 1, 3 }
+       *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+       *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..43736be3f83e86f2d7842191f76be12fb931e4a0
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ba17df51133e5f455f9e5d81af1d6e40a7e78fa
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
@@ -0,0 +1,53 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *   | .  2  1  3  . |   -> { 2, 1, 3 }
+    *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix(
+      5,      // number of matrix rows
+      5 );    // number of matrix columns
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                           0  1  2  <- localIdx values
+       *                           -------
+       * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+       *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+       *   | .  2  1  3  . |   -> { 2, 1, 3 }
+       *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+       *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   matrix.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..a187b1e67da9619090be45c2ec69f6709bac9b88
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ebe40c5acaa56dcab44cb97465b127c5605ac592
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp
@@ -0,0 +1,40 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void laplaceOperatorMatrix()
+{
+   const int gridSize( 6 );
+   const int matrixSize = gridSize;
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix( 
+      matrixSize, // number of rows
+      matrixSize  // number of columns
+   );
+   matrix.setElements( {
+         {  0.0, 1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         {  0.0, 1.0 }
+      } );
+   TNL::Containers::Vector< int, Device > rowLengths;
+   matrix.getCompressedRowLengths( rowLengths );
+   std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl;
+   std::cout << "Compressed row lengths: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..84d47b4466d3189b79ac8707818bed7a700b3c6b
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bac9303ae33ad662851b2038f31821fbb5404c56
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp
@@ -0,0 +1,57 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize = 5;
+   using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< MatrixType > matrix (
+      matrixSize,  // number of matrix rows
+      matrixSize   // number of matrix columns
+    );
+   matrix->setElements(
+      {  { 0.0, 2.0, 1.0 },
+         { 0.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 0.0, 2.0, 1.0 } } );
+
+   /***
+    * Fetch lambda function returns diagonal element in each row.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
+      auto row = matrix->getRow( rowIdx );
+      return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use TridiagonalMatrixView. See
+    * TridiagonalMatrixView::getConstRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+
+   /***
+    * Compute the matrix trace.
+    */
+   int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix->getRows(), std::plus<>{}, fetch, 0 );
+   std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl;
+   std::cout << "Matrix trace is: " << trace << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..5bfb460721f6b205e52cd07b0f0a2fc0e550cf8d
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5ba2bf302701d5bdb6b69dcab69130481f8410c1
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp
@@ -0,0 +1,38 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
+      matrixSize,   // number of matrix columns
+      {             // matrix elements definition
+         {  0.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0,  0.0 }
+      } );
+
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      for( int j = 0; j < matrixSize; j++ )
+         std::cout << std::setw( 5 ) << matrix.getElement( i, j );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..a5089da5548b73dc67e4da9f1338eb963ef10d53
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cfbdce919b381572b8d1ce4b3614298811518713
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp
@@ -0,0 +1,58 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize( 5 );
+   using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< MatrixType > matrix(
+      matrixSize,  // number of matrix rows
+      matrixSize  // number of matrix columns
+   );
+
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      //auto row = matrix->getRow( rowIdx );    
+      // For some reason the previous line of code is not accepted by nvcc 10.1 
+      // so we replace it with the following two lines.
+      auto ref = matrix.modifyData();
+      auto row = ref.getRow( rowIdx );
+
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < matrixSize - 1 )  // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use TridiagonalMatrixView. See
+    * TridiagonalMatrixView::getRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+
+   /***
+    * Set the matrix elements.
+    */
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f );
+   std::cout << std::endl << *matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   // It seems that nvcc 10.1 does not handle lambda functions properly. 
+   // It is hard to make nvcc to compile this example and it does not work
+   // properly. We will try it with later version of CUDA.
+   //std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   //getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..3e31db140745bb4537a7bf618f46436d58e6a94e
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e76f87e1a2c048847d9fbd0d6d696eea8b6f401a
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp
@@ -0,0 +1,23 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+
+template< typename Device >
+void getSerializationTypeExample()
+{
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix;
+
+   std::cout << "Matrix type is: " << matrix.getSerializationType();
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get serialization type on CPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get serialization type on CUDA GPU ... " << std::endl;
+   getSerializationTypeExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4feca977e13836256aa0a9af6243b3c42e4e72f3
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_getSerializationType.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..792dc98d386fa3797b2ee06c13262c1838359269
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
@@ -0,0 +1,78 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *    | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *    | .  2  1  3  . |   -> { 2, 1, 3 }
+    *    | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    * 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { { 0, 1, 3 },  // matrix elements
+        { 2, 1, 3 }, 
+        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 } } );
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..69a58007cbcbfcf49bf5678a9b208274cb6de91b
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..03121d41bb72fea74e33d0552557d4175517c424
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp
@@ -0,0 +1,50 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Pointers/SmartPointersRegister.h>
+
+template< typename Device >
+void setElements()
+{
+   const int matrixSize( 5 );
+   using Matrix = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   TNL::Pointers::SharedPointer< Matrix > matrix( matrixSize, matrixSize );
+   for( int i = 0; i < 5; i++ )
+      matrix->setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << *matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      if( i > 0 )
+         matrix->setElement( i, i - 1, 1.0 );
+      matrix->setElement( i, i, -i );
+      if( i < matrixSize - 1 )
+         matrix->setElement( i, i + 1, 1.0 );
+   };
+
+   /***
+    * For the case when Device is CUDA device we need to synchronize smart
+    * pointers. To avoid this you may use TridiagonalMatrixView. See
+    * TridiagonalMatrixView::getRow example for details.
+    */
+   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << *matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Set elements on CUDA device:" << std::endl;
+   setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..c693c5c23789abd25e3bc6857234c91f3c28f815
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dcc6f3d2a06c45cf99fc4aec13b8239c5f6b810d
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp
@@ -0,0 +1,58 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void createTridiagonalMatrix()
+{
+   const int matrixSize = 6;
+
+   /***
+    * Setup the following matrix (dots represent zeros):
+    * 
+    * /  2 -1 .   .  .  . \
+    * | -1  2 -1  .  .  . |
+    * |  . -1  2 -1  .  . |
+    * |  .  . -1  2 -1  . |
+    * |  .  .  . -1  2 -1 |
+    * \  .  .  .  . -1  2 /
+    * 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix( matrixSize, matrixSize );
+   matrix.setElements( {
+   /***
+    * To set the matrix elements we first extend the diagonals to their full
+    * lengths even outside the matrix (dots represent zeros and zeros are
+    * artificial zeros used for memory alignment):
+    * 
+    * 0 /  2 -1 .   .  .  . \    -> {  0,  2, -1 }
+    *   | -1  2 -1  .  .  . |    -> { -1,  2, -1 }
+    *   |  . -1  2 -1  .  . |    -> { -1,  2, -1 }
+    *   |  .  . -1  2 -1  . |    -> { -1,  2, -1 }
+    *   |  .  .  . -1  2 -1 |    -> { -1,  2, -1 }
+    *   \  .  .  .  . -1  2 / 0  -> { -1,  2,  0 }
+    * 
+    */
+      {  0,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2, -1 },
+      { -1,  2,  0 }
+      } );
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating tridiagonal matrix on CPU ... " << std::endl;
+   createTridiagonalMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating tridiagonal matrix on CUDA GPU ... " << std::endl;
+   createTridiagonalMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4a08aa9268694d0210eb961b5944c1b7a09ea607
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_setElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..183710215561b949f9aa625da3c179f0308548bc
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp
@@ -0,0 +1,41 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void addElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix(
+      matrixSize,    // number of rows
+      matrixSize     // number of columns
+   );
+   auto view = matrix.getView();
+
+   for( int i = 0; i < matrixSize; i++ )
+      view.setElement( i, i, i );
+
+   std::cout << "Initial matrix is: " << std::endl << matrix << std::endl;
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      if( i > 0 )
+         view.addElement( i, i - 1, 1.0, 5.0 );
+      view.addElement( i, i, 1.0, 5.0 );
+      if( i < matrixSize - 1 )
+         view.addElement( i, i + 1, 1.0, 5.0 );
+   }
+
+   std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Add elements on host:" << std::endl;
+   addElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Add elements on CUDA device:" << std::endl;
+   addElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..9eb313e8130c1e8117e4abf19c1e710d5673b88f
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bacb98beecc1ac87be73302896e60ef8e498bc4d
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
@@ -0,0 +1,79 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *    | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *    | .  2  1  3  . |   -> { 2, 1, 3 }
+    *    | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    * 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { { 0, 1, 3 },  // matrix elements
+        { 2, 1, 3 }, 
+        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 } } );
+   auto view = matrix.getView();
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   view.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..7b330650fc19e8db1bd2c844ef405e2b79f3a133
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bd889e1aff317821702c00ff301f2fa7e81c1c19
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *   | .  2  1  3  . |   -> { 2, 1, 3 }
+    *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix(
+      5,      // number of matrix rows
+      5 );    // number of matrix columns
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                           0  1  2  <- localIdx values
+       *                           -------
+       * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+       *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+       *   | .  2  1  3  . |   -> { 2, 1, 3 }
+       *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+       *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   view.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..fae2028882fb518b3b8d879c8aa29bf49c7fe652
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_forAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..24fe78f7f7f34a472e83ac3c060d9ba44171998b
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *   | .  2  1  3  . |   -> { 2, 1, 3 }
+    *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix(
+      5,      // number of matrix rows
+      5 );    // number of matrix columns
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) {
+      /***
+       * 'forRows' method iterates only over matrix elements lying on given subdiagonals
+       * and so we do not need to check anything. The element value can be expressed
+       * by the 'localIdx' variable, see the following figure:
+       * 
+       *                           0  1  2  <- localIdx values
+       *                           -------
+       * 0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+       *   | 2  1  3  .  . |   -> { 2, 1, 3 }
+       *   | .  2  1  3  . |   -> { 2, 1, 3 }
+       *   | .  .  2  1  3 |   -> { 2, 1, 3 }
+       *   \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+       * 
+       */
+      value = 3 - localIdx;
+   };
+   view.forRows( 0, matrix.getRows(), f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ea70e5b9e29793bbfda1ea1eb88b61bfa141eb41
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f20811ea632c54d9e258b3b75e91e1c8547a2669
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp
@@ -0,0 +1,42 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void laplaceOperatorMatrix()
+{
+   const int gridSize( 6 );
+   const int matrixSize = gridSize;
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix( 
+      matrixSize, // number of rows
+      matrixSize  // number of columns
+   );
+   matrix.setElements( {
+         {  0.0, 1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         {  0.0, 1.0 }
+      } );
+   auto view = matrix.getView();
+
+   TNL::Containers::Vector< int, Device > rowLengths;
+   view.getCompressedRowLengths( rowLengths );
+   std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl;
+   std::cout << "Compressed row lengths: " << rowLengths << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
+   laplaceOperatorMatrix< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cu
new file mode 120000
index 0000000000000000000000000000000000000000..c44da63b326aff9e126b11d05fdb001ed5ae0ae6
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_getCompressedRowLengths.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8e5f20793f81fd3783a1f2bba34141b3759bf121
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp
@@ -0,0 +1,47 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize = 5;
+   using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   MatrixType matrix (
+      matrixSize,           // number of matrix columns
+      {  { 0.0, 2.0, 1.0 }, // matrix elements
+         { 0.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 3.0, 2.0, 1.0 },
+         { 0.0, 2.0, 1.0 } } );
+   auto view = matrix.getView();
+
+   /***
+    * Fetch lambda function returns diagonal element in each row.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
+      auto row = view.getRow( rowIdx );
+      return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal
+   };
+
+   /***
+    * Compute the matrix trace.
+    */
+   int trace = TNL::Algorithms::Reduction< Device >::reduce( view.getRows(), std::plus<>{}, fetch, 0 );
+   std::cout << "Matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Matrix trace is: " << trace << "." << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..c43061eb1c87c6e5f3edd83505f903972210c6fd
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_getConstRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5be0bd761023f7ce596fa36810a775e5312d73ea
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp
@@ -0,0 +1,38 @@
+#include <iostream>
+#include <iomanip>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void getElements()
+{
+   const int matrixSize( 5 );
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
+      matrixSize,   // number of matrix columns
+      {             // matrix elements definition
+         {  0.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0, -1.0 },
+         { -1.0, 2.0,  0.0 }
+      } );
+   auto view = matrix.getView();
+
+   for( int i = 0; i < matrixSize; i++ )
+   {
+      for( int j = 0; j < matrixSize; j++ )
+         std::cout << std::setw( 5 ) << view.getElement( i, j );
+      std::cout << std::endl;
+   }
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Get elements on host:" << std::endl;
+   getElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Get elements on CUDA device:" << std::endl;
+   getElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..70bf7122493bab7f4055a4a7128da10857440193
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_getElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..641149e050ffe738576b63a73f1cc792457ebf56
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
@@ -0,0 +1,44 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void getRowExample()
+{
+   const int matrixSize( 5 );
+   using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   MatrixType matrix(
+      matrixSize,  // number of matrix rows
+      matrixSize  // number of matrix columns
+   );
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      auto row = view.getRow( rowIdx );
+
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < matrixSize - 1 )  // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+
+   /***
+    * Set the matrix elements.
+    */
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, view.getRows(), f );
+   std::cout << std::endl << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..12e2f392ccb0e630aab36a507c4ded137b221cf0
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa345292ce4aa5fc629225d910ca90a77bf6be07
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
@@ -0,0 +1,79 @@
+#include <iostream>
+#include <iomanip>
+#include <functional>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+
+template< typename Device >
+void rowsReduction()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    * 
+    *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
+    *    | 2  1  3  .  . |   -> { 2, 1, 3 }
+    *    | .  2  1  3  . |   -> { 2, 1, 3 }
+    *    | .  .  2  1  3 |   -> { 2, 1, 3 }
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
+    * 
+    */
+   TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
+      5,              // number of matrix columns
+      { { 0, 1, 3 },  // matrix elements
+        { 2, 1, 3 }, 
+        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 } } );
+   auto view = matrix.getView();
+
+   /***
+    * Find largest element in each row.
+    */
+   TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() );
+
+   /***
+    * Prepare vector view for lambdas.
+    */
+   auto rowMaxView = rowMax.getView();
+
+   /***
+    * Fetch lambda just returns absolute value of matrix elements.
+    */
+   auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double {
+      return TNL::abs( value );
+   };
+
+   /***
+    * Reduce lambda return maximum of given values.
+    */
+   auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double {
+      return TNL::max( a, b );
+   };
+
+   /***
+    * Keep lambda store the largest value in each row to the vector rowMax.
+    */
+   auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable {
+      rowMaxView[ rowIdx ] = value;
+   };
+
+   /***
+    * Compute the largest values in each row.
+    */
+   view.rowsReduction( 0, view.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+
+   std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
+   std::cout << "Max. elements in rows are: " << rowMax << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Rows reduction on host:" << std::endl;
+   rowsReduction< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Rows reduction on CUDA device:" << std::endl;
+   rowsReduction< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f749c1ef47ec34830d51be3e3dd39d32347b3c8f
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a424904142b2fcc26f499bd3286ba03c6a58dba2
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp
@@ -0,0 +1,43 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void setElements()
+{
+   const int matrixSize( 5 );
+   using Matrix = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   Matrix matrix( matrixSize, matrixSize );
+   auto view = matrix.getView();
+   for( int i = 0; i < 5; i++ )
+      view.setElement( i, i, i );
+
+   std::cout << "Matrix set from the host:" << std::endl;
+   std::cout << matrix << std::endl;
+
+   auto f = [=] __cuda_callable__ ( int i ) mutable {
+      if( i > 0 )
+         view.setElement( i, i - 1, 1.0 );
+      view.setElement( i, i, -i );
+      if( i < matrixSize - 1 )
+         view.setElement( i, i + 1, 1.0 );
+   };
+
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f );
+
+   std::cout << "Matrix set from its native device:" << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Set elements on host:" << std::endl;
+   setElements< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Set elements on CUDA device:" << std::endl;
+   setElements< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cu
new file mode 120000
index 0000000000000000000000000000000000000000..9510661c16a7340e6fd1f51958f2c35f280cd177
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h
index a066b461ef018232023873a4787948a4fb3aba60..91db24d0187ea588d111d49beb5370c3e27fe24b 100644
--- a/src/Benchmarks/SpMV/spmv-legacy.h
+++ b/src/Benchmarks/SpMV/spmv-legacy.h
@@ -158,12 +158,12 @@ benchmarkSpMV( Benchmark& benchmark,
 
    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
          { "matrix name", convertToString( inputFileName ) },
-         { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
+         { "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) },
          { "rows", convertToString( hostMatrix.getRows() ) },
          { "columns", convertToString( hostMatrix.getColumns() ) },
          { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
       } ));
-   const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
+   const int elements = hostMatrix.getNonzeroElementsCount();
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setOperation( datasetSize );
 
diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h
index 9bcd45d08605c991533bede42836185264e10749..034c4560b65a6c47b5282323a20643c57770cdd6 100644
--- a/src/TNL/Containers/Segments/CSR.h
+++ b/src/TNL/Containers/Segments/CSR.h
@@ -88,9 +88,6 @@ class CSR
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-
       __cuda_callable__
       SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 685f6ef54a6d7ad90ec69e5d45d83d78c0e1f337..0ba034d5b24dbee0c9f7cf798ae9e51a8b5d4ffb 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -170,16 +170,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
    return offsets[ segmentIdx ] + localIdx;
 }
 
-template< typename Device,
-          typename Index,
-          typename IndexAllocator >
-__cuda_callable__
-void
-CSR< Device, Index, IndexAllocator >::
-getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
-{
-}
-
 template< typename Device,
           typename Index,
           typename IndexAllocator >
diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h
index b01e6c66d7e9ff6c6022e3b88ece898940616fa9..f915a2e38029811c19fbac68c06fa83312101b7e 100644
--- a/src/TNL/Containers/Segments/CSRView.h
+++ b/src/TNL/Containers/Segments/CSRView.h
@@ -87,9 +87,6 @@ class CSRView
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-
       __cuda_callable__
       SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index 7599327d1085f40a3cbedb24b297dc8300f202fa..e6f840d6b20d04526f274ff4418299a8689389be 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -151,15 +151,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
    return offsets[ segmentIdx ] + localIdx;
 }
 
-template< typename Device,
-          typename Index >
-__cuda_callable__
-void
-CSRView< Device, Index >::
-getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
-{
-}
-
 template< typename Device,
           typename Index >
 __cuda_callable__
diff --git a/src/TNL/Containers/Segments/ElementsOrganization.h b/src/TNL/Containers/Segments/ElementsOrganization.h
index 3c4086cd20ce7463f69f1aba12dff1280e07824f..d930f57f9b8dc18748b861acd743ec0c3672e560 100644
--- a/src/TNL/Containers/Segments/ElementsOrganization.h
+++ b/src/TNL/Containers/Segments/ElementsOrganization.h
@@ -26,7 +26,15 @@ struct DefaultElementsOrganization
          return ColumnMajorOrder;
    };
 };
-
       } // namespace Segments
    }  // namespace Containers
+
+String getSerializationType( Containers::Segments::ElementsOrganization Organization )
+{
+   if( Organization == Containers::Segments::RowMajorOrder )
+      return String( "RowMajorOrder" );
+   else
+      return String( "ColumnMajorOrder" );
+}
+
 } // namespace TNL
diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h
index f5c8490dec78480ca0a62d87a033702b6dbd4e25..8ef79f6d07981ca6a30c2778b06f3e2f0e77ba4e 100644
--- a/src/TNL/Containers/Segments/Ellpack.h
+++ b/src/TNL/Containers/Segments/Ellpack.h
@@ -85,9 +85,6 @@ class Ellpack
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-
       __cuda_callable__
       SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 922a75765ae3920072712016f498847d57105eae..91e06e1108b52935320ddba634c9aa344df3ba24 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -233,17 +233,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
       return segmentIdx + this->alignedSize * localIdx;
 }
 
-template< typename Device,
-          typename Index,
-          typename IndexAllocator,
-          ElementsOrganization Organization,
-          int Alignment >
-__cuda_callable__ 
-void Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
-getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
-{
-}
-
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h
index 3559949d890da4dfcd43d5c3fc3852e55bec7175..1d77c7445b3b5f3792daebafca980bfa837929c7 100644
--- a/src/TNL/Containers/Segments/EllpackView.h
+++ b/src/TNL/Containers/Segments/EllpackView.h
@@ -81,9 +81,6 @@ class EllpackView
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-
       __cuda_callable__
       SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index 0195424e99d2f9467565d58841ab90979edf041e..2d1d8090f035b92ec5ba7b226cfe26faddf11524 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -164,15 +164,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
       return segmentIdx + this->alignedSize * localIdx;
 }
 
-template< typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          int Alignment >
-__cuda_callable__ void EllpackView< Device, Index, Organization, Alignment >::
-getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
-{
-}
-
 template< typename Device,
           typename Index,
           ElementsOrganization Organization,
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h
index c85e457424443604cd4954e6ff9a4513bd9f17ca..a0e5c81b0d7e92f109ccedacf27ef7a4454d5927 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.h
+++ b/src/TNL/Containers/Segments/SlicedEllpack.h
@@ -82,9 +82,6 @@ class SlicedEllpack
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-
       __cuda_callable__
       SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index 6a9bc4fe2ba888777c7ddf277e0504e2ce7dc986..a288bf7df800011ba3baeeb552e3e3e518b4494d 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -259,18 +259,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
       return sliceOffset + segmentInSliceIdx + SliceSize * localIdx;
 }
 
-template< typename Device,
-          typename Index,
-          typename IndexAllocator,
-          ElementsOrganization Organization,
-          int SliceSize >
-__cuda_callable__
-void
-SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >::
-getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
-{
-}
-
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h
index 2a0fcb189a5b442831857378fb33c8c9f5f819be..5c6e0eeb26cf7e0c8586be1d7f61c2e39ce37af9 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.h
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.h
@@ -83,9 +83,6 @@ class SlicedEllpackView
       __cuda_callable__
       IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;
 
-      __cuda_callable__
-      void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;
-
       __cuda_callable__
       SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
 
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 7331181efdeaa346d02230dd807d87471619288f..34f24def68187abf603d38da8eadae43b8b3e26c 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -202,17 +202,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
       return sliceOffset + segmentInSliceIdx + SliceSize * localIdx;
 }
 
-template< typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          int SliceSize >
-__cuda_callable__
-void
-SlicedEllpackView< Device, Index, Organization, SliceSize >::
-getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
-{
-}
-
 template< typename Device,
           typename Index,
           ElementsOrganization Organization,
diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h
index 392bdc6179eac3a28ad9bb9ffbd27a4ca287d418..736cd3bcf77820d36c70fa9f9c02f00cfc753886 100644
--- a/src/TNL/Matrices/DenseMatrix.h
+++ b/src/TNL/Matrices/DenseMatrix.h
@@ -35,7 +35,7 @@ template< typename Real = double,
           typename Index = int,
           ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(),
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
-class DenseMatrix : public Matrix< Real, Device, Index >
+class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 {
    protected:
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
@@ -88,7 +88,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
 
       /**
-       * \brief Type for accessing matrix row.
+       * \brief Type for accessing matrix rows.
        */
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
@@ -103,17 +103,35 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       using Self = DenseMatrix< _Real, _Device, _Index, _Organization, _RealAllocator >;
 
       /**
-       * \brief Constructor without parameters.
+       * \brief Constructor only with values allocator.
+       * 
+       * \param allocator is used for allocation of matrix elements values.
+       */
+      DenseMatrix( const RealAllocatorType& allocator = RealAllocatorType() );
+
+      /**
+       * \brief Copy constructor.
+       * 
+       * \param matrix is the source matrix
+       */
+      DenseMatrix( const DenseMatrix& matrix ) = default;
+
+      /**
+       * \brief Move constructor.
+       * 
+       * \param matrix is the source matrix
        */
-      DenseMatrix();
+      DenseMatrix( DenseMatrix&& matrix ) = default;
 
       /**
        * \brief Constructor with matrix dimensions.
        * 
        * \param rows is number of matrix rows.
        * \param columns is number of matrix columns.
+       * \param allocator is used for allocation of matrix elements values.
        */
-      DenseMatrix( const IndexType rows, const IndexType columns );
+      DenseMatrix( const IndexType rows, const IndexType columns,
+                   const RealAllocatorType& allocator = RealAllocatorType() );
 
       /**
        * \brief Constructor with 2D initializer list.
@@ -124,14 +142,16 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * 
        * \param data is a initializer list of initializer lists representing
        * list of matrix rows.
+       * \param allocator is used for allocation of matrix elements values.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_Constructor_init_list.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp
        * \par Output
        * \include DenseMatrixExample_Constructor_init_list.out
        */
       template< typename Value >
-      DenseMatrix( std::initializer_list< std::initializer_list< Value > > data );
+      DenseMatrix( std::initializer_list< std::initializer_list< Value > > data,
+                  const RealAllocatorType& allocator = RealAllocatorType() );
 
       /**
        * \brief Returns a modifiable view of the dense matrix.
@@ -189,6 +209,15 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       template< typename Matrix >
       void setLike( const Matrix& matrix );
 
+      /**
+       * \brief This method is only for the compatibility with the sparse matrices.
+       * 
+       * This method does nothing. In debug mode it contains assertions checking
+       * that given rowCapacities are compatible with the current matrix dimensions.
+       */
+      template< typename RowCapacitiesVector >
+      void setRowCapacities( const RowCapacitiesVector& rowCapacities );
+
       /**
        * \brief This method recreates the dense matrix from 2D initializer list.
        * 
@@ -200,22 +229,13 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * list of matrix rows.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_setElements.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_setElements.cpp
        * \par Output
        * \include DenseMatrixExample_setElements.out
        */
       template< typename Value >
       void setElements( std::initializer_list< std::initializer_list< Value > > data );
 
-      /**
-       * \brief This method is only for the compatibility with the sparse matrices.
-       * 
-       * This method does nothing. In debug mode it contains assertions checking
-       * that given rowCapacities are compatible with the current matrix dimensions.
-       */
-      template< typename RowCapacitiesVector >
-      void setRowCapacities( const RowCapacitiesVector& rowCapacities );
-
       /**
        * \brief Computes number of non-zeros in each row.
        * 
@@ -223,36 +243,20 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * will be stored.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_getCompressedRowLengths.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cpp
        * \par Output
        * \include DenseMatrixExample_getCompressedRowLengths.out
        */
       template< typename RowLengthsVector >
       void getCompressedRowLengths( RowLengthsVector& rowLengths ) const;
 
-      /**
-       * \brief Returns number of all matrix elements.
-       * 
-       * This method is here mainly for compatibility with sparse matrices since
-       * the number of all matrix elements is just number of rows times number of
-       * columns.
-       * 
-       * \return number of all matrix elements.
-       * 
-       * \par Example
-       * \include Matrices/DenseMatrixExample_getElementsCount.cpp
-       * \par Output
-       * \include DenseMatrixExample_getElementsCount.out
-       */
-      IndexType getElementsCount() const;
-
       /**
        * \brief Returns number of non-zero matrix elements.
        * 
        * \return number of all non-zero matrix elements.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_getElementsCount.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp
        * \par Output
        * \include DenseMatrixExample_getElementsCount.out
        */
@@ -271,7 +275,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \return RowView for accessing given matrix row.
        *
        * \par Example
-       * \include Matrices/DenseMatrixExample_getConstRow.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp
        * \par Output
        * \include DenseMatrixExample_getConstRow.out
        * 
@@ -288,7 +292,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \return RowView for accessing given matrix row.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_getRow.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
        * \par Output
        * \include DenseMatrixExample_getRow.out
        * 
@@ -349,7 +353,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \param value is the value the element will be set to.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_setElement.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp
        * \par Output
        * \include DenseMatrixExample_setElement.out
        */
@@ -373,6 +377,12 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \param value is the value the element will be set to.
        * \param thisElementMultiplicator is multiplicator the original matrix element
        *   value is multiplied by before addition of given \e value.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp
+       * \par Output
+       * \include DenseMatrixExample_addElement.out
+       * 
        */
       __cuda_callable__
       void addElement( const IndexType row,
@@ -394,6 +404,12 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \param column i a column index of the matrix element.
        * 
        * \return value of given matrix element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp
+       * \par Output
+       * \include DenseMatrixExample_getElement.out
+       * 
        */
       __cuda_callable__
       Real getElement( const IndexType row,
@@ -411,20 +427,47 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
        * \tparam FetchValue is type returned by the Fetch lambda function.
        * 
-       * \param first is an index of the first row the reduction will be performed on.
-       * \param last is an index of the row  after the last row the reduction will be performed on.
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include DenseMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
        * \param fetch is an instance of lambda function for data fetch.
        * \param reduce is an instance of lambda function for reduction.
        * \param keep in an instance of lambda function for storing results.
        * \param zero is zero of given reduction operation also known as idempotent element.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_rowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
        * \par Output
        * \include DenseMatrixExample_rowsReduction.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const;
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const;
 
       /**
        * \brief Method for performing general reduction on ALL matrix rows.
@@ -444,7 +487,32 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_allRowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include DenseMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
        * \par Output
        * \include DenseMatrixExample_allRowsReduction.out
        */
@@ -456,47 +524,47 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * 
        * \tparam Function is type of lambda function that will operate on matrix elements.
        *    It is should have form like
-       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, const RealType& value, bool& compute )`.
        *  The column index repeats twice only for compatibility with sparse matrices. 
        *  If the 'compute' variable is set to false the iteration over the row can 
        *  be interrupted.
        * 
-       * \param first is index is the first row to be processed.
-       * \param last is index of the row after the last row to be processed.
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
        * \param function is an instance of the lambda function to be called in each row.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_forRows.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
        * \par Output
        * \include DenseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function ) const;
+      void forRows( IndexType begin, IndexType end, Function& function ) const;
 
       /**
        * \brief Method for iteration over all matrix rows for non-constant instances.
        * 
        * \tparam Function is type of lambda function that will operate on matrix elements.
        *    It is should have form like
-       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, RealType& value, bool& compute )`.
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, RealType& value, bool& compute )`.
        *  The column index repeats twice only for compatibility with sparse matrices. 
        *  If the 'compute' variable is set to false the iteration over the row can 
        *  be interrupted.
        * 
-       * \param first is index is the first row to be processed.
-       * \param last is index of the row after the last row to be processed.
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
        * \param function is an instance of the lambda function to be called in each row.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_forRows.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
        * \par Output
        * \include DenseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
+      void forRows( IndexType begin, IndexType end, Function& function );
 
       /**
-       * \brief This method calls \e forRows for all matrix rows.
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
        * 
        * See \ref DenseMatrix::forRows.
        * 
@@ -504,7 +572,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * \param function  is an instance of the lambda function to be called in each row.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_forAllRows.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
        * \par Output
        * \include DenseMatrixExample_forAllRows.out
        */
@@ -514,13 +582,13 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       /**
        * \brief This method calls \e forRows for all matrix rows.
        * 
-       * See \ref DenseMatrix::forRows.
+       * See \ref DenseMatrix::forAllRows.
        * 
        * \tparam Function is a type of lambda function that will operate on matrix elements.
        * \param function  is an instance of the lambda function to be called in each row.
        * 
        * \par Example
-       * \include Matrices/DenseMatrixExample_forAllRows.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
        * \par Output
        * \include DenseMatrixExample_forAllRows.out
        */
@@ -532,7 +600,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
        * 
        * More precisely, it computes:
        * 
-       * outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector.
+       * `outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector`
        * 
        * \tparam InVector is type of input vector.  It can be \ref Vector,
        *     \ref VectorView, \ref Array, \ref ArraView or similar container.
@@ -579,7 +647,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
                                 const RealType& omega = 1.0 ) const;
 
       /**
-       * \brief Assignment operator for exactly the same type of the dense matrix.
+       * \brief Assignment operator with exactly the same type of the dense matrix.
        * 
        * \param matrix is the right-hand side matrix.
        * \return reference to this matrix.
@@ -587,7 +655,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       DenseMatrix& operator=( const DenseMatrix& matrix );
 
       /**
-       * \brief Assignment operator for other dense matrices.
+       * \brief Assignment operator with other dense matrices.
        * 
        * \param matrix is the right-hand side matrix.
        * \return reference to this matrix.
@@ -597,7 +665,7 @@ class DenseMatrix : public Matrix< Real, Device, Index >
       DenseMatrix& operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSOrganization, RHSRealAllocator >& matrix );
 
       /**
-       * \brief Assignment operator for other (sparse) types of matrices.
+       * \brief Assignment operator with other (sparse) types of matrices.
        * 
        * \param matrix is the right-hand side matrix.
        * \return reference to this matrix.
diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp
index 2b2fcc996fdc52b0f6530cf35c95a53c72ea276f..844fe576b4d67ba4b7b6de994b49103b4d57e9b1 100644
--- a/src/TNL/Matrices/DenseMatrix.hpp
+++ b/src/TNL/Matrices/DenseMatrix.hpp
@@ -22,7 +22,9 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-DenseMatrix< Real, Device, Index, Organization, RealAllocator >::DenseMatrix()
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+DenseMatrix( const RealAllocatorType& allocator )
+: Matrix< Real, Device, Index, RealAllocator >( allocator )
 {
 }
 
@@ -32,7 +34,9 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-DenseMatrix( const IndexType rows, const IndexType columns )
+DenseMatrix( const IndexType rows, const IndexType columns,
+             const RealAllocatorType& allocator )
+: Matrix< Real, Device, Index, RealAllocator >( allocator )
 {
    this->setDimensions( rows, columns );
 }
@@ -44,7 +48,9 @@ template< typename Real,
           typename RealAllocator >
    template< typename Value >
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-DenseMatrix( std::initializer_list< std::initializer_list< Value > > data )
+DenseMatrix( std::initializer_list< std::initializer_list< Value > > data,
+             const RealAllocatorType& allocator )
+: Matrix< Real, Device, Index, RealAllocator >( allocator )
 {
    this->setElements( data );
 }
@@ -199,18 +205,6 @@ getCompressedRowLengths( RowLengthsVector& rowLengths ) const
    this->view.getCompressedRowLengths( rowLengths );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          typename RealAllocator >
-Index
-DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-getElementsCount() const
-{
-   return this->getRows() * this->getColumns();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -346,9 +340,35 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
 {
-   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -372,9 +392,9 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-forRows( IndexType first, IndexType last, Function& function ) const
+forRows( IndexType begin, IndexType end, Function& function ) const
 {
-   this->view.forRows( first, last, function );
+   this->view.forRows( begin, end, function );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h
index 78fecd0f7dc06572c62cc089d0e24fd180baf916..996c5a6e7e7c7351ad927eb50170b36a06508799 100644
--- a/src/TNL/Matrices/DenseMatrixRowView.h
+++ b/src/TNL/Matrices/DenseMatrixRowView.h
@@ -25,6 +25,11 @@ namespace TNL {
  * \include Matrices/DenseMatrixExample_getRow.cpp
  * \par Output
  * \include DenseMatrixExample_getRow.out
+ * 
+ * \par Example
+ * \include Matrices/DenseMatrixViewExample_getRow.cpp
+ * \par Output
+ * \include DenseMatrixViewExample_getRow.out
  */
 template< typename SegmentView,
           typename ValuesView >
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index e09253cb23136a3a265a8f06dc4181fdefafdb52..b0c50fd56ad98efc28c2ec752c57f87c6142ac7b 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -119,6 +119,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param rows number of matrix rows.
        * \param columns number of matrix columns.
        * \param values is vector view with matrix elements values.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_constructor.out
+
        */
       __cuda_callable__
       DenseMatrixView( const IndexType rows,
@@ -149,72 +155,416 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       __cuda_callable__
       ConstViewType getConstView() const;
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * The string has a form \e `Matrices::DenseMatrix< RealType,  [any_device], IndexType, [any_allocator], true/false >`.
+       * 
+       * \return \e String with the serialization type.
+       */
       static String getSerializationType();
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * See \ref DenseMatrixView::getSerializationType.
+       * 
+       * \return \e String with the serialization type.
+       */
       virtual String getSerializationTypeVirtual() const;
 
+      /**
+       * \brief Computes number of non-zeros in each row.
+       * 
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_getCompressedRowLengths.out
+       */
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
-      [[deprecated]]
-      IndexType getRowLength( const IndexType row ) const;
-
-      IndexType getMaxRowLength() const;
-
-      IndexType getElementsCount() const;
+      /**
+       * \brief Returns number of all matrix elements.
+       * 
+       * This method is here mainly for compatibility with sparse matrices since
+       * the number of all matrix elements is just number of rows times number of
+       * columns.
+       * 
+       * \return number of all matrix elements.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_getElementsCount.out
+       */
+      IndexType getAllocatedElementsCount() const;
 
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       * 
+       * \return number of all non-zero matrix elements.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_getElementsCount.out
+       */
       IndexType getNonzeroElementsCount() const;
 
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_getConstRow.out
+       * 
+       * See \ref DenseMatrixRowView.
+       */
       __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
+       * \par Output
+       * \include DenseMatrixExample_getRow.out
+       * 
+       * See \ref DenseMatrixRowView.
+       */
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
-
+      /**
+       * \brief Sets all matrix elements to value \e v.
+       * 
+       * \param v is value all matrix elements will be set to.
+       */
       void setValue( const RealType& v );
 
+      /**
+       * \brief Returns non-constant reference to element at row \e row and column column.
+       * 
+       * Since this method returns reference to the element, it cannot be called across
+       * different address spaces. It means that it can be called only form CPU if the matrix
+       * is allocated on CPU or only from GPU kernels if the matrix is allocated on GPU.
+       * 
+       * \param row is a row index of the element.
+       * \param column is a columns index of the element. 
+       * \return reference to given matrix element.
+       */
       __cuda_callable__
       Real& operator()( const IndexType row,
                         const IndexType column );
 
+      /**
+       * \brief Returns constant reference to element at row \e row and column column.
+       * 
+       * Since this method returns reference to the element, it cannot be called across
+       * different address spaces. It means that it can be called only form CPU if the matrix
+       * is allocated on CPU or only from GPU kernels if the matrix is allocated on GPU.
+       * 
+       * \param row is a row index of the element.
+       * \param column is a columns index of the element. 
+       * \return reference to given matrix element.
+       */
       __cuda_callable__
       const Real& operator()( const IndexType row,
                               const IndexType column ) const;
 
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref DenseMatrix::getRow
+       * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp
+       * \par Output
+       * \include DenseMatrixExample_setElement.out
+       */
       __cuda_callable__
       void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref DenseMatrix::getRow
+       * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementMultiplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp
+       * \par Output
+       * \include DenseMatrixExample_addElement.out
+       * 
+       */
       __cuda_callable__
       void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref DenseMatrix::getRow
+       * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows.
+       * 
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       * 
+       * \return value of given matrix element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp
+       * \par Output
+       * \include DenseMatrixExample_getElement.out
+       * 
+       */
       __cuda_callable__
       Real getElement( const IndexType row,
                        const IndexType column ) const;
 
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_rowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_allRowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The column index repeats twice only for compatibility with sparse matrices. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_forRows.out
+       */
       template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function ) const;
+      void forRows( IndexType begin, IndexType end, Function& function ) const;
 
+      /**
+       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, RealType& value, bool& compute )`.
+       *  The column index repeats twice only for compatibility with sparse matrices. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_forRows.out
+       */
       template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
+      void forRows( IndexType begin, IndexType end, Function& function );
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       * 
+       * See \ref DenseMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function ) const;
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       * 
+       * See \ref DenseMatrix::forAllRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include DenseMatrixExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function );
 
+      /**
+       * \brief Computes product of matrix and vector.
+       * 
+       * More precisely, it computes:
+       * 
+       * `outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector`
+       * 
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * 
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
       template< typename InVector, typename OutVector >
       void vectorProduct( const InVector& inVector,
                           OutVector& outVector,
@@ -244,12 +594,37 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
+      /**
+       * \brief Assignment operator with DenseMatrix.
+       * 
+       * \param matrix is the right-hand side matrix.
+       * \return reference to this matrix.
+       */
       DenseMatrixView& operator=( const DenseMatrixView& matrix );
 
+      /**
+       * \brief Method for saving the matrix view to the file with given filename.
+       * 
+       * The ouput file can be loaded by \ref DenseMatrix.
+       * 
+       * \param fileName is name of the file.
+       */
       void save( const String& fileName ) const;
 
+      /**
+       * \brief Method for saving the matrix view to a file.
+       * 
+       * The ouput file can be loaded by \ref DenseMatrix.
+       * 
+       * \param fileName is name of the file.
+       */
       void save( File& file ) const;
 
+      /**
+       * \brief Method for printing the matrix to output stream.
+       * 
+       * \param str is the output stream.
+       */
       void print( std::ostream& str ) const;
 
    protected:
@@ -258,9 +633,6 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
       IndexType getElementIndex( const IndexType row,
                                  const IndexType column ) const;
 
-      //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
-      //friend class DenseDeviceDependentCode< DeviceType >;
-
       SegmentsViewType segments;
 };
 
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 2ba34e549ddf2219a0b723699f62afd7451b67f8..b96a8475b12efb0ba16acec67944e1a950a32065 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -40,6 +40,13 @@ DenseMatrixView( const IndexType rows,
                  const ValuesViewType& values )
  : MatrixView< Real, Device, Index >( rows, columns, values )
 {
+#ifdef __CUDA_ARCH__
+   TNL_ASSERT_EQ( values.getSize(), this->getAllocatedElementsCount(), "Number of matrix elements does not agree with matrix dimensions." );
+#else
+   if( values.getSize() != this->getAllocatedElementsCount() )
+      throw( std::logic_error( "Number of matrix elements does not agree with matrix dimensions." ) );
+#endif
+
    SegmentsType a( rows, columns );
    segments = a.getView();
 }
@@ -82,9 +89,9 @@ DenseMatrixView< Real, Device, Index, Organization >::
 getSerializationType()
 {
    return String( "Matrices::DenseMatrix< " ) +
-          TNL::getSerializationType< RealType >() + ", [any_device], " +
-          TNL::getSerializationType< IndexType >() + ", " +
-          ( Organization ? "true" : "false" ) + ", [any_allocator] >";
+      TNL::getSerializationType< RealType >() + ", [any_device], " +
+      TNL::getSerializationType< IndexType >() + ", " +
+      TNL::getSerializationType( Organization ) + " >";
 }
 
 template< typename Real,
@@ -125,29 +132,7 @@ template< typename Real,
           ElementsOrganization Organization >
 Index
 DenseMatrixView< Real, Device, Index, Organization >::
-getRowLength( const IndexType row ) const
-{
-   return this->getColumns();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-Index
-DenseMatrixView< Real, Device, Index, Organization >::
-getMaxRowLength() const
-{
-   return this->getColumns();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-Index
-DenseMatrixView< Real, Device, Index, Organization >::
-getElementsCount() const
+getAllocatedElementsCount() const
 {
    return this->getRows() * this->getColumns();
 }
@@ -286,14 +271,43 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+{
+   auto values_view = this->values.getView();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+DenseMatrixView< Real, Device, Index, Organization >::
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
    const auto values_view = this->values.getConstView();
    auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
          return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
       return zero;
    };
-   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+   this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+DenseMatrixView< Real, Device, Index, Organization >::
+allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -315,14 +329,13 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-forRows( IndexType first, IndexType last, Function& function ) const
+forRows( IndexType begin, IndexType end, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
       function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ], compute );
    };
-   this->segments.forSegments( first, last, f );
-
+   this->segments.forSegments( begin, end, f );
 }
 
 template< typename Real,
@@ -332,14 +345,13 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-forRows( IndexType first, IndexType last, Function& function )
+forRows( IndexType begin, IndexType end, Function& function )
 {
    auto values_view = this->values.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
       function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ], compute );
    };
-   this->segments.forSegments( first, last, f );
-
+   this->segments.forSegments( begin, end, f );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/DenseRow.h b/src/TNL/Matrices/DenseRow.h
deleted file mode 100644
index a1d6d2124b1df9d7da0dcc98b26aa83431bfeaa9..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/DenseRow.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/***************************************************************************
-                          DenseRow.h  -  description
-                             -------------------
-    begin                : Dec 24, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-class DenseRow
-{
-   public:
-
-      __cuda_callable__
-      DenseRow();
-
-      __cuda_callable__
-      DenseRow( Real* values,
-                         const Index columns,
-                         const Index step );
-
-      __cuda_callable__
-      void bind( Real* values,
-                 const Index columns,
-                 const Index step );
-
-      __cuda_callable__
-      void setElement( const Index& elementIndex,
-                       const Index& column,
-                       const Real& value );
-
-   protected:
-
-      Real* values;
-
-      Index columns, step;
-};
-
-} // namespace Matrices
-} // namespace TNL
-
-#include <TNL/Matrices/DenseRow_impl.h>
-
diff --git a/src/TNL/Matrices/DenseRow_impl.h b/src/TNL/Matrices/DenseRow_impl.h
deleted file mode 100644
index 7b1bac1a5bdc5074b5f22b4d3b5d86046e605011..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/DenseRow_impl.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/***************************************************************************
-                          DenseRow_impl.h  -  description
-                             -------------------
-    begin                : Dec 24, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-__cuda_callable__
-DenseRow< Real, Index >::
-DenseRow()
-: values( 0 ),
-  columns( 0 ),
-  step( 0 )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-DenseRow< Real, Index >::
-DenseRow( Real* values,
-                   const Index columns,
-                   const Index step )
-: values( values ),
-  columns( columns ),
-  step( step )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-DenseRow< Real, Index >::
-bind( Real* values,
-      const Index columns,
-      const Index step )
-{
-   this->values = values;
-   this->columns = columns;
-   this->step = step;
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-DenseRow< Real, Index >::
-setElement( const Index& elementIndex,
-            const Index& column,
-            const Real& value )
-{
-   TNL_ASSERT( this->values, );
-   TNL_ASSERT( this->step > 0,);
-   TNL_ASSERT( column >= 0 && column < this->columns,
-              std::cerr << "column = " << column << " this->columns = " << this->columns );
-
-   this->values[ column * this->step ] = value;
-}
-
-} // namespace Matrices
-} // namespace TNL
diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h
index 4785a6790978b8229ca1207ee6f9cd7b71a32103..83c1bd682c719f992ca06b6f3f2fdf17942ab73d 100644
--- a/src/TNL/Matrices/DistributedSpMV.h
+++ b/src/TNL/Matrices/DistributedSpMV.h
@@ -218,7 +218,7 @@ public:
 
 protected:
    // communication pattern
-   Matrices::DenseMatrix< IndexType, Devices::Host, int, Containers::Segments::RowMajorOrder, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds;
+   Matrices::DenseMatrix< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds;
 
    // span of rows with only block-diagonal entries
    std::pair< IndexType, IndexType > localOnlySpan;
diff --git a/src/TNL/Matrices/LambdaMatrix.h b/src/TNL/Matrices/LambdaMatrix.h
index 5f3ecdfb36bc447f44578c4c0493ae635b1e7cd4..1692510e70ecfeb0aa3e5365d7535db47c221599 100644
--- a/src/TNL/Matrices/LambdaMatrix.h
+++ b/src/TNL/Matrices/LambdaMatrix.h
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <TNL/String.h>
-#include <TNL/Devices/AnyDevice.h>
+#include <TNL/Devices/Host.h>
 
 namespace TNL {
 namespace Matrices {
@@ -19,87 +19,292 @@ namespace Matrices {
 /**
  * \brief "Matrix-free" matrix based on lambda functions.
  * 
- * \tparam MatrixElementsLambda is a lambda function returning matrix elements
- *    values and positions.
- * \tparam CompressedRowLengthsLambda is a lambda function returning a number
- *    of non-zero elements in each row.
+ * The elements of this matrix are not stored explicitly in memory but
+ * implicitly on a form of lambda functions.
+ * 
+ * \tparam MatrixElementsLambda is a lambda function returning matrix elements values and positions.
+ * 
+ *    It has the following form:
+ * 
+ *   `matrixElements( IndexType rows, IndexType columns, IndexType row, IndexType localIdx, IndexType& elementColumn, RealType& elementValue )`
+ * 
+ *    where \e rows is the number of matrix rows, \e columns is the number of matrix columns, \e row is the index of matrix row being queried,
+ *    \e localIdx is the rank of the non-zero element in given row, \e elementColumn is a column index of the matrix element computed by
+ *    this lambda and \e elementValue is a value of the matrix element computed by this lambda.
+ * \tparam CompressedRowLengthsLambda is a lambda function returning a number of non-zero elements in each row.
+ * 
+ *    It has the following form:
+ * 
+ *    `rowLengths( IndexType rows, IndexType columns, IndexType row ) -> IndexType`
+ * 
+ *    where \e rows is the number of matrix rows, \e columns is the number of matrix columns and \e row is an index of the row being queried.
+ *
  * \tparam Real is a type of matrix elements values.
- * \tparam Device is a device on which the lambda functions can evaluated. 
- *    Devices::AnyDevice can be used for lambdas with no restriction.
+ * \tparam Device is a device on which the lambda functions will be evaluated. 
  * \ẗparam Index is a type used for indexing.
  */
 template< typename MatrixElementsLambda,
           typename CompressedRowLengthsLambda,
           typename Real = double,
-          typename Device = Devices::AnyDevice,
+          typename Device = Devices::Host,
           typename Index = int >
 class LambdaMatrix
 {
    public:
-      static constexpr bool isSymmetric() { return false; };
-      static constexpr bool isBinary() { return false; };
 
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
       using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
       using IndexType = Index;
 
+      static constexpr bool isSymmetric() { return false; };
+      static constexpr bool isBinary() { return false; };
+
+      /**
+       * \brief Constructor with lambda functions defining the matrix elements.
+       * 
+       * Note: It might be difficult to express the types of the lambdas. For easier creation of
+       * \e LambdaMatrix you may use \ref LambdaMatrixFactory.
+       * 
+       * \param matrixElements is a lambda function giving matrix elements position and value.
+       * \param compressedRowLentghs is a lambda function returning how many non-zero matrix elements are in given row.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp
+       * \par Output
+       * \include LambdaMatrixExample_Constructor.out
+       */
       LambdaMatrix( MatrixElementsLambda& matrixElements,
                     CompressedRowLengthsLambda& compressedRowLentghs );
 
+      /**
+       * \brief Constructor with matrix dimensions and lambda functions defining the matrix elements.
+       * 
+       * Note: It might be difficult to express the types of the lambdas. For easier creation of
+       * \e LambdaMatrix you may use \ref LambdaMatrixFactory.
+       * 
+       * \param rows is a number of the matrix rows.
+       * \param columns is a number of the matrix columns.
+       * \param matrixElements is a lambda function giving matrix elements position and value.
+       * \param compressedRowLentghs is a lambda function returning how many non-zero matrix elements are in given row.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp
+       * \par Output
+       * \include LambdaMatrixExample_Constructor.out
+       */
       LambdaMatrix( const IndexType& rows,
                     const IndexType& columns,
                     MatrixElementsLambda& matrixElements,
                     CompressedRowLengthsLambda& compressedRowLentghs );
 
+      /**
+       * \brief Copy constructor.
+       * 
+       * \param matrix is input matrix.
+       */
+      LambdaMatrix( const LambdaMatrix& matrix ) = default;
+
+      /**
+       * \brief Move constructor.
+       * 
+       * \param matrix is input matrix.
+       */
+      LambdaMatrix( LambdaMatrix&& matrix ) = default;
+
+      /**
+       * \brief Set number of rows and columns of this matrix.
+       * 
+       * \param rows is the number of matrix rows.
+       * \param columns is the number of matrix columns.
+       */
       void setDimensions( const IndexType& rows,
                          const IndexType& columns );
 
+      /**
+       * \brief Returns a number of matrix rows.
+       * 
+       * \return number of matrix rows.
+       */
       __cuda_callable__
       IndexType getRows() const;
 
+      /**
+       * \brief Returns a number of matrix columns.
+       * 
+       * \return number of matrix columns.
+       */
       __cuda_callable__
       IndexType getColumns() const;
 
-      template< typename Vector >
-      void getCompressedRowLengths( Vector& rowLengths ) const;
-
-      IndexType getNumberOfNonzeroMatrixElements() const;
-
+      /**
+       * \brief Computes number of non-zeros in each row.
+       * 
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include LambdaMatrixExample_getCompressedRowLengths.out
+       */
+      template< typename RowLentghsVector >
+      void getCompressedRowLengths( RowLentghsVector& rowLengths ) const;
+
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       * 
+       * \return number of all non-zero matrix elements.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_getElementsCount.cpp
+       * \par Output
+       * \include LambdaMatrixExample_getElementsCount.out
+       */
+      IndexType getNonzeroElementsCount() const;
+
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       * 
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       * 
+       * \return value of given matrix element.
+       */
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-            template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;
-
-      /***
-       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include LambdaMatrixExample_rowsReduction.out
        */
-      template< typename InVector,
-                typename OutVector >
-      void vectorProduct( const InVector& inVector,
-                          OutVector& outVector,
-                          const RealType& matrixMultiplicator = 1.0,
-                          const RealType& outVectorMultiplicator = 0.0 ) const;
-
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include LambdaMatrixExample_allRowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
+      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, const RealType& value, bool& compute )`.
+       *  The column index repeats twice only for compatibility with sparse matrices. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
+       * \par Output
+       * \include LambdaMatrixExample_forRows.out
+       */
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function ) const;
 
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
-
+      /**
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
+       * 
+       * See \ref LambdaMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include LambdaMatrixExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function ) const;
 
-      template< typename Function >
-      void forAllRows( Function& function );
+      /**
+       * \brief Computes product of matrix and vector.
+       * 
+       * More precisely, it computes:
+       * 
+       * `outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector`
+       * 
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * 
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType& matrixMultiplicator = 1.0,
+                          const RealType& outVectorMultiplicator = 0.0,
+                          const IndexType begin = 0,
+                          IndexType end = 0 ) const;
+
 
       template< typename Vector1, typename Vector2 >
       void performSORIteration( const Vector1& b,
@@ -107,6 +312,11 @@ class LambdaMatrix
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
+      /**
+       * \brief Method for printing the matrix to output stream.
+       * 
+       * \param str is the output stream.
+       */
       void print( std::ostream& str ) const;
 
    protected:
@@ -118,21 +328,49 @@ class LambdaMatrix
       CompressedRowLengthsLambda compressedRowLengthsLambda;
 };
 
+/**
+ * \brief Insertion operator for dense matrix and output stream.
+ * 
+ * \param str is the output stream.
+ * \param matrix is the lambda matrix.
+ * \return reference to the stream.
+ */
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+std::ostream& operator<< ( std::ostream& str, const LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >& matrix );
 
 /**
  * \brief Helper class for creating instances of LambdaMatrix.
- * @param matrixElementsLambda
- * @param compressedRowLengthsLambda
- * @return 
+ * 
+ * See \ref LambdaMatrix.
+ * 
+ * \param matrixElementsLambda
+ * \param compressedRowLengthsLambda
  */
 template< typename Real = double,
-          typename Device = Devices::AnyDevice,
+          typename Device = Devices::Host,
           typename Index = int >
 struct LambdaMatrixFactory
 {
    using RealType = Real;
    using IndexType = Index;
-   
+
+   /**
+    * \brief Creates lambda matrix with given lambda functions.
+    * 
+    * \param matrixElementsLambda is a lambda function evaluating matrix elements.
+    * \param compressedRowLengthsLambda is a lambda function returning number of
+    *    non-zero matrix elements in given \e row.
+    * \return instance of LambdaMatrix.
+    * 
+    * \par Example
+    * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp
+    * \par Output
+    * \include LambdaMatrixExample_Constructor.out
+    */
    template< typename MatrixElementsLambda,
              typename CompressedRowLengthsLambda >
    static auto create( MatrixElementsLambda& matrixElementsLambda,
@@ -143,7 +381,22 @@ struct LambdaMatrixFactory
          matrixElementsLambda,
          compressedRowLengthsLambda );
    };
-   
+
+   /**
+    * \brief Creates lambda matrix with given dimensions and lambda functions.
+    * 
+    * \param rows is number of matrix rows.
+    * \param columns is number of matrix columns.
+    * \param matrixElementsLambda is a lambda function evaluating matrix elements.
+    * \param compressedRowLengthsLambda is a lambda function returning number of
+    *    non-zero matrix elements in given \e row.
+    * \return instance of LambdaMatrix.
+    * 
+    * \par Example
+    * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp
+    * \par Output
+    * \include LambdaMatrixExample_Constructor.out
+    */
    template< typename MatrixElementsLambda,
              typename CompressedRowLengthsLambda >
    static auto create( const IndexType& rows,
@@ -153,6 +406,7 @@ struct LambdaMatrixFactory
    -> LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >
    {
       return LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >(
+         rows, columns,
          matrixElementsLambda,
          compressedRowLengthsLambda );
    };
diff --git a/src/TNL/Matrices/LambdaMatrix.hpp b/src/TNL/Matrices/LambdaMatrix.hpp
index c992bd575bd7f2e09bbd50ded60191a90ceca297..7e606d1e7f49991eaf7f09cb05c618c289f817eb 100644
--- a/src/TNL/Matrices/LambdaMatrix.hpp
+++ b/src/TNL/Matrices/LambdaMatrix.hpp
@@ -10,10 +10,13 @@
 
 #pragma once
 
+#include <iomanip>
+#include <functional>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Matrices/LambdaMatrix.h>
 #include <TNL/Algorithms/ParallelFor.h>
 #include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Matrices/details/SparseMatrix.h>
 
 namespace TNL {
 namespace Matrices {
@@ -94,33 +97,16 @@ void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
 getCompressedRowLengths( Vector& rowLengths ) const
 {
-   using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType;
-   
-   rowLengths.setSize( this->getRows() );
-   const IndexType rows = this->getRows();
-   const IndexType columns = this->getColumns();
-   auto rowLengthsView = rowLengths.getView();
-   auto compressedRowLengths = this->compressedRowLengthsLambda;
-
-   if( std::is_same< typename Vector::DeviceType, Device_ >::value )
-      Algorithms::ParallelFor< Device_ >::exec(
-         ( IndexType ) 0,
-         this->getRows(),
-         [=] __cuda_callable__ ( const IndexType row ) mutable {
-            rowLengthsView[ row ] = compressedRowLengths( rows, columns, row );
-         } );
-   else
-   {
-      Containers::Vector< IndexType, Device_, IndexType > aux( this->getRows() );
-      auto auxView = aux.getView();
-      Algorithms::ParallelFor< Device_ >::exec(
-         ( IndexType ) 0,
-         this->getRows(),
-         [=] __cuda_callable__ ( const IndexType row ) mutable {
-            auxView[ row ] = compressedRowLengths( rows, columns, row );
-         } );
-      rowLengths = aux;
-   }
+   details::set_size_if_resizable( rowLengths, this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, std::plus<>{}, keep, 0 );
 }
 
 template< typename MatrixElementsLambda,
@@ -130,9 +116,9 @@ template< typename MatrixElementsLambda,
           typename Index >
 Index
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-getNumberOfNonzeroMatrixElements() const
+getNonzeroElementsCount() const
 {
-   Containers::Vector< IndexType, typename Devices::PickDevice< DeviceType >::DeviceType, IndexType > rowLengthsVector;
+   Containers::Vector< IndexType, DeviceType, IndexType > rowLengthsVector;
    this->getCompressedRowLengths( rowLengthsVector );
    return sum( rowLengthsVector );
 }
@@ -147,8 +133,7 @@ LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, In
 getElement( const IndexType row,
             const IndexType column ) const
 {
-   using Device_ = typename Devices::PickDevice< Devices::Host >::DeviceType;
-   Containers::Array< RealType, Device_ > value( 1 );
+   Containers::Array< RealType, DeviceType > value( 1 );
    auto valueView = value.getView();
    auto rowLengths = this->compressedRowLengthsLambda;
    auto matrixElements = this->matrixElementsLambda;
@@ -169,25 +154,10 @@ getElement( const IndexType row,
          }
       }
    };
-   Algorithms::ParallelFor< Device_ >::exec( row, row + 1, getValue );
+   Algorithms::ParallelFor< DeviceType >::exec( row, row + 1, getValue );
    return valueView.getElement( 0 );
 }
 
-template< typename MatrixElementsLambda,
-          typename CompressedRowLengthsLambda,
-          typename Real,
-          typename Device,
-          typename Index >
-      template< typename Vector >
-__cuda_callable__
-typename Vector::RealType
-LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-rowVectorProduct( const IndexType row,
-                  const Vector& vector ) const
-{
-   
-}
-
 template< typename MatrixElementsLambda,
           typename CompressedRowLengthsLambda,
           typename Real,
@@ -200,20 +170,22 @@ LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, In
 vectorProduct( const InVector& inVector,
                OutVector& outVector,
                const RealType& matrixMultiplicator,
-               const RealType& outVectorMultiplicator ) const
+               const RealType& outVectorMultiplicator,
+               const IndexType begin,
+               IndexType end ) const
 {
    TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
 
    const auto inVectorView = inVector.getConstView();
    auto outVectorView = outVector.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType columnIdx, const RealType& value ) mutable -> RealType {
       if( value == 0.0 )
          return 0.0;
       return value * inVectorView[ columnIdx ];
    };
-   auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
-      sum += value;
+   auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) -> RealType {
+      return sum + value;
    };
    auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       if( outVectorMultiplicator == 0.0 )
@@ -221,7 +193,9 @@ vectorProduct( const InVector& inVector,
       else
          outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
    };
-   this->allRowsReduction( fetch, reduce, keep, 0.0 );
+   if( ! end )
+      end = this->getRows();
+   this->rowsReduction( begin, end, fetch, reduce, keep, 0.0 );
 }
 
 template< typename MatrixElementsLambda,
@@ -232,10 +206,9 @@ template< typename MatrixElementsLambda,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   using FetchType = decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) );
-   using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType;
+   using FetchType = decltype( fetch( IndexType(), IndexType(), RealType() ) );
 
    const IndexType rows = this->getRows();
    const IndexType columns = this->getColumns();
@@ -251,12 +224,12 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
         matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue );
         FetchType fetchValue( zero );
         if( elementValue != 0.0 )
-            fetchValue = fetch( rowIdx, localIdx, elementColumn, elementValue );
-        reduce( result, fetchValue );
+            fetchValue = fetch( rowIdx, elementColumn, elementValue );
+        result = reduce( result, fetchValue );
       }
       keep( rowIdx, result );
    };
-   Algorithms::ParallelFor< Device_ >::exec( first, last, processRow );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, processRow );
 }
 
 template< typename MatrixElementsLambda,
@@ -267,7 +240,7 @@ template< typename MatrixElementsLambda,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
@@ -282,9 +255,6 @@ void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
-   using FetchType = decltype( fetch( IndexType(), IndexType(), RealType(), IndexType() ) );
-   using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType;
-
    const IndexType rows = this->getRows();
    const IndexType columns = this->getColumns();
    auto rowLengths = this->compressedRowLengthsLambda;
@@ -301,7 +271,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
             function( rowIdx, localIdx, elementColumn, elementValue, compute );
       }
    };
-   Algorithms::ParallelFor< Device_ >::exec( first, last, processRow );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, processRow );
 }
 
 template< typename MatrixElementsLambda,
@@ -312,9 +282,25 @@ template< typename MatrixElementsLambda,
    template< typename Function >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-forRows( IndexType first, IndexType last, Function& function )
+forAllRows( Function& function ) const
 {
-   this->forRows( 0, this->getRows(), function );
+   const IndexType rows = this->getRows();
+   const IndexType columns = this->getColumns();
+   auto rowLengths = this->compressedRowLengthsLambda;
+   auto matrixElements = this->matrixElementsLambda;
+   auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      const IndexType rowLength = rowLengths( rows, columns, rowIdx );
+      bool compute( true );
+      for( IndexType localIdx = 0; localIdx < rowLength && compute; localIdx++ )
+      {
+        IndexType elementColumn( 0 );
+        RealType elementValue( 0.0 );
+        matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue );
+        if( elementValue != 0.0 )
+            function( rowIdx, localIdx, elementColumn, elementValue, compute );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( 0, this->getRows(), processRow );
 }
 
 template< typename MatrixElementsLambda,
@@ -347,13 +333,35 @@ print( std::ostream& str ) const
       str <<"Row: " << row << " -> ";
       for( IndexType column = 0; column < this->getColumns(); column++ )
       {
-         auto value = this->getElement( row, column );
-         if( value != ( RealType ) 0 )
-            str << " Col:" << column << "->" << value << "\t";
+         RealType value = this->getElement( row, column );
+         if( value )
+         {
+            std::stringstream str_;
+            str_ << std::setw( 4 ) << std::right << column << ":" << std::setw( 4 ) << std::left << value;
+            str << std::setw( 10 ) << str_.str();
+         }
       }
       str << std::endl;
    }
 }
 
+/**
+ * \brief Insertion operator for dense matrix and output stream.
+ * 
+ * \param str is the output stream.
+ * \param matrix is the lambda matrix.
+ * \return reference to the stream.
+ */
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+std::ostream& operator<< ( std::ostream& str, const LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >& matrix )
+{
+   matrix.print( str );
+   return str;
+}
+
 } //namespace Matrices
 } //namespace TNL
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 4b954d314ab7775ec00b65ac31bea4c18684c4ff..210332bcd77603ee01000bf59e076460b123a34a 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -58,7 +58,7 @@ public:
 
    IndexType getAllocatedElementsCount() const;
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+   IndexType getNonzeroElementsCount() const;
 
    void reset();
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index ce5f52274ec1134f30a52b64bf1572b7d757dc84..adacaee57f961db424cb85ef548dc7b1189f24fd 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -79,7 +79,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
+Index Matrix< Real, Device, Index, RealAllocator >::getNonzeroElementsCount() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h
index 1d825a769d1d2e78b944c7bf464353585d3188b9..65c2aca63a798c920da05958ab293054c0d1f7f2 100644
--- a/src/TNL/Matrices/MatrixInfo.h
+++ b/src/TNL/Matrices/MatrixInfo.h
@@ -34,6 +34,8 @@ template< typename Matrix >
 struct MatrixInfo
 {};
 
+/// This is to prevent from appearing in Doxygen documentation.
+/// \cond HIDDEN_CLASS
 template< typename Real,
           typename Device,
           typename Index,
@@ -151,5 +153,6 @@ struct MatrixInfo< Legacy::SlicedEllpack< Real, Device, Index, SliceSize> >
    static String getFormat() { return "SlicedEllpack Legacy"; };
 };
 
+/// \endcond
 } //namespace Matrices
 } //namespace TNL
diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h
index ae0606678f1b9167b10fd4b9e4868847c41c9b99..c9960982ac79086a3b1fee2ea08dc438e1359d49 100644
--- a/src/TNL/Matrices/MatrixReader.h
+++ b/src/TNL/Matrices/MatrixReader.h
@@ -17,9 +17,12 @@
 namespace TNL {
 namespace Matrices {
 
+/// This is to prevent from appearing in Doxygen documentation.
+/// \cond HIDDEN_CLASS
 template< typename Device >
 class MatrixReaderDeviceDependentCode
 {};
+/// \endcond
 
 template< typename Matrix >
 class MatrixReader
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index df2c05c63237c23e0fa26ba331ac8a3d2da03b8c..862d4a285cc3565f9c37309336fa333523d6c111 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -357,6 +357,8 @@ void MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
    value = ( RealType ) atof( parsedLine[ 2 ].getString() );
 }
 
+/// This is to prevent from appearing in Doxygen documentation.
+/// \cond HIDDEN_CLASS
 template<>
 class MatrixReaderDeviceDependentCode< Devices::Host >
 {
@@ -392,6 +394,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
       MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader );
    }
 };
+/// \endcond
 
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h
index 8d4cfe7ba4d592ca117005127660f800df287906..ad1faaa8ba8e665fcb81b6b37ecaa594b4df2608 100644
--- a/src/TNL/Matrices/MatrixType.h
+++ b/src/TNL/Matrices/MatrixType.h
@@ -11,8 +11,13 @@
 #pragma once
 
 namespace TNL {
-   namespace Matrices {
+namespace Matrices {
 
+/**
+ * \brief Structure for specifying type of sparse matrix.
+ * 
+ * It is used for specification of \ref SparseMatrix type.
+ */
 template< bool Symmetric,
           bool Binary >
 struct MatrixType
@@ -21,43 +26,65 @@ struct MatrixType
 
    static constexpr bool isBinary() { return Binary; }
 
+   static String getSerializationType() {
+      String type;
+      if( ! isBinary() && ! isSymmetric() )
+         type = "General";
+      else
+      {
+         if( isSymmetric ) type = "Symmetric";
+         if( isBinary ) type += "Binary";
+      }
+      return type;
+   }
 };
 
-struct GeneralMatrix
-{
-   static constexpr bool isSymmetric() { return false; }
-
-   static constexpr bool isBinary() { return false; }
-};
-
-struct SymmetricMatrix
-{
-   static constexpr bool isSymmetric() { return true; }
-
-   static constexpr bool isBinary() { return false; }
-};
-
-struct BinaryMatrix
-{
-   static constexpr bool isSymmetric() { return false; }
-
-   static constexpr bool isBinary() { return true; }
-};
-
-struct BinarySymmetricMatrix
-{
-   static constexpr bool isSymmetric() { return false; }
-
-   static constexpr bool isBinary() { return true; }
-};
-
-struct SymmetricBinaryMatrix
-{
-   static constexpr bool isSymmetric() { return false; }
-
-   static constexpr bool isBinary() { return true; }
-};
-
-
-   } //namespace Matrices
-} //namespace TNL
\ No newline at end of file
+/**
+ * \brief General non-symmetric matrix type.
+ * 
+ * It is used for specification of \ref SparseMatrix type.
+ */
+struct GeneralMatrix : MatrixType< false, false > {};
+
+/**
+ * \brief Symmetric matrix type.
+ * 
+ * Symmetric matrix stores only lower part of the matrix and its diagonal. The
+ * upper part is reconstructed on the fly.
+ * It is used for specification of \ref SparseMatrix type.
+ */
+struct SymmetricMatrix : MatrixType< true, false > {};
+
+/**
+ * \brief Binary matrix type.
+ * 
+ * Binary matrix does not store explictly values of matrix elements and thus
+ * it reduces memory consumption.
+ * It is used for specification of \ref SparseMatrix type. 
+ */
+struct BinaryMatrix : MatrixType< false, true > {};
+
+/**
+ * \brief Symmetric and binary matrix type.
+ * 
+ * Symmetric matrix stores only lower part of the matrix and its diagonal. The
+ * upper part is reconstructed on the fly.
+ * Binary matrix does not store explictly values of matrix elements and thus
+ * it reduces memory consumption.
+ * It is used for specification of \ref SparseMatrix type.
+ */
+struct BinarySymmetricMatrix : MatrixType< true, true > {};
+
+/**
+ * \brief Symmetric and binary matrix type.
+ * 
+ * Symmetric matrix stores only lower part of the matrix and its diagonal. The
+ * upper part is reconstructed on the fly.
+ * Binary matrix does not store explictly values of matrix elements and thus
+ * it reduces memory consumption.
+ * It is used for specification of \ref SparseMatrix type.
+ */
+struct SymmetricBinaryMatrix : MatrixType< true, true > {};
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 76a3948a98792388120097cc7e20190ba58c95e5..9c23e539f02cfb412db82b58d771c5923c57cf13 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -49,9 +49,12 @@ public:
    __cuda_callable__
    MatrixView( const MatrixView& view ) = default;
 
+   __cuda_callable__
+   MatrixView( MatrixView&& view ) = default;
+
    IndexType getAllocatedElementsCount() const;
 
-   virtual IndexType getNumberOfNonzeroMatrixElements() const;
+   virtual IndexType getNonzeroElementsCount() const;
 
    __cuda_callable__
    IndexType getRows() const;
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
index b2b181e4c4671607728bfb9f37935a23fe258a30..e10874201eb587110238570b5978b42cdc0cf7f2 100644
--- a/src/TNL/Matrices/MatrixView.hpp
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -57,7 +57,7 @@ template< typename Real,
           typename Index >
 Index
 MatrixView< Real, Device, Index >::
-getNumberOfNonzeroMatrixElements() const
+getNonzeroElementsCount() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
deleted file mode 100644
index 9823a7b6198191cb601eaf8ea5403254fc5ff88a..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/Multidiagonal.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/***************************************************************************
-                          Multidiagonal.h  -  description
-                             -------------------
-    begin                : Oct 13, 2011
-    copyright            : (C) 2011 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Matrices/Matrix.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/MultidiagonalMatrixRowView.h>
-#include <TNL/Containers/Segments/Ellpack.h>
-#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h>
-#include <TNL/Matrices/MultidiagonalMatrixView.h>
-
-namespace TNL {
-namespace Matrices {
-
-template< typename Real = double,
-          typename Device = Devices::Host,
-          typename Index = int,
-          ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(),
-          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
-          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
-class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >
-{
-   public:
-      using RealType = Real;
-      using DeviceType = Device;
-      using IndexType = Index;
-      using RealAllocatorType = RealAllocator;
-      using IndexAllocatorType = IndexAllocator;
-      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesVectorType = typename BaseType::ValuesVectorType;
-      using ValuesViewType = typename ValuesVectorType::ViewType;
-      using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, Organization >;
-      using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
-      using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType;
-      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >;
-      using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >;
-      using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
-
-      using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
-      using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType;
-
-
-      // TODO: remove this - it is here only for compatibility with original matrix implementation
-      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-
-      template< typename _Real = Real,
-                typename _Device = Device,
-                typename _Index = Index >
-      using Self = Multidiagonal< _Real, _Device, _Index >;
-
-      static constexpr ElementsOrganization getOrganization() { return Organization; };
-
-      Multidiagonal();
-
-      Multidiagonal( const IndexType rows,
-                     const IndexType columns );
-
-      template< typename Vector >
-      Multidiagonal( const IndexType rows,
-                     const IndexType columns,
-                     const Vector& diagonalsShifts );
-
-      ViewType getView() const; // TODO: remove const
-
-      //ConstViewType getConstView() const;
-
-      static String getSerializationType();
-
-      virtual String getSerializationTypeVirtual() const;
-
-      template< typename Vector >
-      void setDimensions( const IndexType rows,
-                          const IndexType columns,
-                          const Vector&  diagonalsShifts );
-
-      //template< typename Vector >
-      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
-
-      const IndexType& getDiagonalsCount() const;
-
-      const DiagonalsShiftsType& getDiagonalsShifts() const;
-
-      template< typename Vector >
-      void getCompressedRowLengths( Vector& rowLengths ) const;
-
-      IndexType getNonemptyRowsCount() const;
-
-      [[deprecated]]
-      IndexType getRowLength( const IndexType row ) const;
-
-      IndexType getMaxRowLength() const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      void setLike( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m );
-
-      IndexType getNumberOfNonzeroMatrixElements() const;
-
-      void reset();
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      bool operator == ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      bool operator != ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const;
-
-      __cuda_callable__
-      RowView getRow( const IndexType& rowIdx );
-
-      __cuda_callable__
-      const RowView getRow( const IndexType& rowIdx ) const;
-
-      void setValue( const RealType& v );
-
-      void setElement( const IndexType row,
-                       const IndexType column,
-                       const RealType& value );
-
-      void addElement( const IndexType row,
-                       const IndexType column,
-                       const RealType& value,
-                       const RealType& thisElementMultiplicator = 1.0 );
-
-      RealType getElement( const IndexType row,
-                           const IndexType column ) const;
-
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function ) const;
-
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
-
-      template< typename Function >
-      void forAllRows( Function& function ) const;
-
-      template< typename Function >
-      void forAllRows( Function& function );
-
-      template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;
-
-      template< typename InVector,
-                typename OutVector >
-      void vectorProduct( const InVector& inVector,
-                          OutVector& outVector ) const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      void addMatrix( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
-                      const RealType& matrixMultiplicator = 1.0,
-                      const RealType& thisMatrixMultiplicator = 1.0 );
-
-      template< typename Real2, typename Index2 >
-      void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
-                             const RealType& matrixMultiplicator = 1.0 );
-
-      template< typename Vector1, typename Vector2 >
-      __cuda_callable__
-      void performSORIteration( const Vector1& b,
-                                const IndexType row,
-                                Vector2& x,
-                                const RealType& omega = 1.0 ) const;
-
-      // copy assignment
-      Multidiagonal& operator=( const Multidiagonal& matrix );
-
-      // cross-device copy assignment
-      template< typename Real_,
-                typename Device_,
-                typename Index_,
-                ElementsOrganization Organization_,
-                typename RealAllocator_,
-                typename IndexAllocator_ >
-      Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix );
-
-      void save( File& file ) const;
-
-      void load( File& file );
-
-      void save( const String& fileName ) const;
-
-      void load( const String& fileName );
-
-      void print( std::ostream& str ) const;
-
-      const IndexerType& getIndexer() const;
-
-      IndexerType& getIndexer();
-
-      __cuda_callable__
-      IndexType getPaddingIndex() const;
-
-   protected:
-
-      __cuda_callable__
-      IndexType getElementIndex( const IndexType row,
-                                 const IndexType localIdx ) const;
-
-      DiagonalsShiftsType diagonalsShifts;
-
-      HostDiagonalsShiftsType hostDiagonalsShifts;
-
-      IndexerType indexer;
-
-      ViewType view;
-};
-
-} // namespace Matrices
-} // namespace TNL
-
-#include <TNL/Matrices/Multidiagonal.hpp>
diff --git a/src/TNL/Matrices/MultidiagonalMatrix.h b/src/TNL/Matrices/MultidiagonalMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..74cd7a4fe3748180c8f2134dfae145f09910ff11
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrix.h
@@ -0,0 +1,882 @@
+/***************************************************************************
+                          Multidiagonal.h  -  description
+                             -------------------
+    begin                : Oct 13, 2011
+    copyright            : (C) 2011 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/MultidiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h>
+#include <TNL/Matrices/MultidiagonalMatrixView.h>
+
+namespace TNL {
+namespace Matrices {
+
+/**
+ * \brief Implementation of sparse multidiagonal matrix.
+ *
+ * Use this matrix type for storing of matrices where the offsets of non-zero elements
+ * from the diagonal are the same in each row. Typically such matrices arise from
+ * discretization of partial differential equations on regular numerical grids. This is
+ * one example (dots represent zero matrix elements):
+ *
+ * \f[
+ * \left(
+ * \begin{array}{ccccccc}
+ *  4  & -1  &  .  & -1  &  . & .  \\
+ * -1  &  4  & -1  &  .  & -1 & .  \\
+ *  .  & -1  &  4  & -1  &  . & -1 \\
+ * -1  & .   & -1  &  4  & -1 &  . \\
+ *  .  & -1  &  .  & -1  &  4 & -1 \\
+ *  .  &  .  & -1  &  .  & -1 &  4
+ * \end{array}
+ * \right)
+ * \f]
+ *
+ * In this matrix, the column indexes in each row \f$i\f$ can be expressed as
+ * \f$\{i-3, i-1, i, i+1, i+3\}\f$ (where the resulting index is non-negative and
+ *  smaller than the number of matrix columns). Therefore the diagonals offsets
+ * are \f$\{-3,-1,0,1,3\}\f$. Advantage is that we do not store the column indexes
+ * explicitly as it is in \ref SparseMatrix. This can reduce significantly the
+ * memory requirements which also means better performance. See the following table
+ * for the storage requirements comparison between \ref MultidiagonalMatrix and \ref SparseMatrix.
+ *
+ *  Data types         |      SparseMatrix    | MultidiagonalMatrix | Ratio
+ * --------------------|----------------------|---------------------|--------
+ *  float + 32-bit int | 8 bytes per element  | 4 bytes per element | 50%
+ *  double + 32-bit int| 12 bytes per element | 8 bytes per element | 75%
+ *  float + 64-bit int | 12 bytes per element | 4 bytes per element | 30%
+ *  double + 64-bit int| 16 bytes per element | 8 bytes per element | 50%
+ *
+ * \tparam Real is a type of matrix elements.
+ * \tparam Device is a device where the matrix is allocated.
+ * \tparam Index is a type for indexing of the matrix elements.
+ * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder
+ *         or ColumnMajorOrder.
+ * \tparam RealAllocator is allocator for the matrix elements.
+ * \tparam IndexAllocator is allocator for the matrix elements offsets.
+ */
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(),
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
+{
+   public:
+
+      // Supporting types - they are not important for the user
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using IndexerType = details::MultidiagonalMatrixIndexer< Index, Organization >;
+      using DiagonalsOffsetsType = Containers::Vector< Index, Device, Index, IndexAllocator >;
+      using DiagonalsOffsetsView = typename DiagonalsOffsetsType::ViewType;
+      using HostDiagonalsOffsetsType = Containers::Vector< Index, Devices::Host, Index >;
+      using HostDiagonalsOffsetsView = typename HostDiagonalsOffsetsType::ViewType;
+
+      /**
+       * \brief The type of matrix elements.
+       */
+      using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
+      using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = Index;
+
+      /**
+       * \brief The allocator for matrix elements values.
+       */
+      using RealAllocatorType = RealAllocator;
+
+      /**
+       * \brief The allocator for matrix elements offsets from the diagonal.
+       */
+      using IndexAllocatorType = IndexAllocator;
+
+      /**
+       * \brief Type of related matrix view.
+       *
+       * See \ref MultidiagonalMatrixView.
+       */
+      using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >;
+
+      /**
+       * \brief Matrix view type for constant instances.
+       *
+       * See \ref MultidiagonalMatrixView.
+       */
+      using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
+
+      /**
+       * \brief Type for accessing matrix rows.
+       */
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >;
+
+      /**
+       * \brief Type for accessing constant matrix rows.
+       */
+      using ConstRowView = typename RowView::ConstViewType;
+
+      /**
+       * \brief Helper type for getting self type or its modifications.
+       */
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                ElementsOrganization _Organization = Organization,
+                typename _RealAllocator = RealAllocator,
+                typename _IndexAllocator = IndexAllocator >
+      using Self = MultidiagonalMatrix< _Real, _Device, _Index, _Organization, _RealAllocator, _IndexAllocator >;
+
+      /**
+       * \brief Elements organization getter.
+       */
+      static constexpr ElementsOrganization getOrganization() { return Organization; };
+
+      /**
+       * \brief Constructor with no parameters.
+       */
+      MultidiagonalMatrix();
+
+      /**
+       * \brief Constructor with matrix dimensions.
+       *
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       */
+      MultidiagonalMatrix( const IndexType rows,
+                           const IndexType columns );
+
+      /**
+       * \brief Constructor with matrix dimensions and matrix elements offsets.
+       *
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param diagonalsOffsets are offsets of subdiagonals from the main diagonal.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_Constructor.out
+       */
+      template< typename Vector >
+      MultidiagonalMatrix( const IndexType rows,
+                           const IndexType columns,
+                           const Vector& diagonalsOffsets );
+
+      /**
+       * \brief Constructor with matrix dimensions and diagonals offsets.
+       *
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param diagonalsOffsets are offsets of sub-diagonals from the main diagonal.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_Constructor_init_list_1.out
+       */
+      template< typename ListIndex >
+      MultidiagonalMatrix( const IndexType rows,
+                           const IndexType columns,
+                           const std::initializer_list< ListIndex > diagonalsOffsets );
+
+      /**
+       * \brief Constructor with matrix dimensions, diagonals offsets and matrix elements.
+       *
+       * The number of matrix rows is deduced from the size of the initializer list \e data.
+       *
+       * \tparam ListIndex is type used in the initializer list defining matrix diagonals offsets.
+       * \tparam ListReal is type used in the initializer list defining matrix elements values.
+       *
+       * \param columns is number of matrix columns.
+       * \param diagonalOffsets are offsets of sub-diagonals from the main diagonal.
+       * \param data is initializer list holding matrix elements. The size of the outer list
+       *    defines the number of matrix rows. Each inner list defines values of each sub-diagonal
+       *    and so its size should be lower or equal to the size of \e diagonalsOffsets. Values
+       *    of sub-diagonals which do not fit to given row are omitted.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_Constructor_init_list_2.out
+       */
+      template< typename ListIndex, typename ListReal >
+      MultidiagonalMatrix( const IndexType columns,
+                           const std::initializer_list< ListIndex > diagonalsOffsets,
+                           const std::initializer_list< std::initializer_list< ListReal > >& data );
+
+      /**
+       * \brief Copy constructor.
+       *
+       * \param matrix is an input matrix.
+       */
+      MultidiagonalMatrix( const MultidiagonalMatrix& matrix ) = default;
+
+      /**
+       * \brief Move constructor.
+       *
+       * \param matrix is an input matrix.
+       */
+      MultidiagonalMatrix( MultidiagonalMatrix&& matrix ) = default;
+
+      /**
+       * \brief Returns a modifiable view of the mutlidiagonal matrix.
+       *
+       * See \ref MultidiagonalMatrixView.
+       *
+       * \return multidiagonal matrix view.
+       */
+      ViewType getView() const; // TODO: remove const
+
+      /**
+       * \brief Returns a non-modifiable view of the multidiagonal matrix.
+       *
+       * See \ref MultidiagonalMatrixView.
+       *
+       * \return multidiagonal matrix view.
+       */
+      ConstViewType getConstView() const;
+
+      /**
+       * \brief Returns string with serialization type.
+       *
+       * The string has a form `Matrices::MultidiagonalMatrix< RealType,  [any_device], IndexType, ElementsOrganization, [any_allocator], [any_allocator] >`.
+       *
+       * \return \ref String with the serialization type.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_getSerializationType.out
+       */
+      static String getSerializationType();
+
+      /**
+       * \brief Returns string with serialization type.
+       *
+       * See \ref MultidiagonalMatrix::getSerializationType.
+       *
+       * \return \e String with the serialization type.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_getSerializationType.out
+       */
+      virtual String getSerializationTypeVirtual() const;
+
+      /**
+       * \brief Set matrix dimensions and diagonals offsets.
+       *
+       * \tparam Vector is type of vector like container holding the diagonals offsets.
+       *
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param diagonalsOffsets is vector with diagonals offsets.
+       */
+      template< typename Vector >
+      void setDimensions( const IndexType rows,
+                          const IndexType columns,
+                          const Vector& diagonalsOffsets );
+
+      /**
+       * \brief This method is for compatibility with \ref SparseMatrix.
+       *
+       * It checks if the number of matrix diagonals is compatible with
+       * required number of non-zero matrix elements in each row. If not
+       * exception is thrown.
+       *
+       * \tparam RowCapacitiesVector is vector-like container type for holding required
+       *    row capacities.
+       *
+       * \param rowCapacities is vector-like container holding required row capacities.
+       */
+      template< typename RowCapacitiesVector >
+      void setRowCapacities( const RowCapacitiesVector& rowCapacities );
+
+      /**
+       * \brief Set matrix elements from an initializer list.
+       *
+       * \tparam ListReal is data type of the initializer list.
+       *
+       * \param data is initializer list holding matrix elements. The size of the outer list
+       *    defines the number of matrix rows. Each inner list defines values of each sub-diagonal
+       *    and so its size should be lower or equal to the size of \e diagonalsOffsets. Values
+       *    of sub-diagonals which do not fit to given row are omitted.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_setElements.out
+       */
+      template< typename ListReal >
+      void setElements( const std::initializer_list< std::initializer_list< ListReal > >& data );
+
+      /**
+       * \brief Returns number of diagonals.
+       *
+       * \return Number of diagonals.
+       */
+      const IndexType& getDiagonalsCount() const;
+
+      /**
+       * \brief Returns vector with diagonals offsets.
+       *
+       * \return vector with diagonals offsets.
+       */
+      const DiagonalsOffsetsType& getDiagonalsOffsets() const;
+
+      /**
+       * \brief Computes number of non-zeros in each row.
+       *
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_getCompressedRowLengths.out
+       */
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
+
+      /**
+       * \brief Setup the matrix dimensions and diagonals offsets based on another multidiagonal matrix.
+       *
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * \tparam RealAllocator_ is \e RealAllocator of the source matrix.
+       * \tparam IndexAllocator_ is \e IndexAllocator of the source matrix.
+       *
+       * \param matrix is the source matrix.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      void setLike( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix );
+
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       *
+       * This method really counts the non-zero matrix elements and so
+       * it returns zero for matrix having all allocated elements set to zero.
+       *
+       * \return number of non-zero matrix elements.
+       */
+      IndexType getNonzeroElementsCount() const;
+
+      /**
+       * \brief Resets the matrix to zero dimensions.
+       */
+      void reset();
+
+      /**
+       * \brief Comparison operator with another multidiagonal matrix.
+       *
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * \tparam RealAllocator_ is \e RealAllocator of the source matrix.
+       * \tparam IndexAllocator_ is \e IndexAllocator of the source matrix.
+       *
+       * \return \e true if both matrices are identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      bool operator == ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const;
+
+      /**
+       * \brief Comparison operator with another multidiagonal matrix.
+       *
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * \tparam RealAllocator_ is \e RealAllocator of the source matrix.
+       * \tparam IndexAllocator_ is \e IndexAllocator of the source matrix.
+       *
+       * \param matrix is the source matrix.
+       *
+       * \return \e true if both matrices are NOT identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      bool operator != ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const;
+
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       *
+       * \param rowIdx is matrix row index.
+       *
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_getRow.out
+       *
+       * See \ref MultidiagonalMatrixRowView.
+       */
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       *
+       * \param rowIdx is matrix row index.
+       *
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_getConstRow.out
+       *
+       * See \ref MultidiagonalMatrixRowView.
+       */
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      /**
+       * \brief Set all matrix elements to given value.
+       *
+       * \param value is the new value of all matrix elements.
+       */
+      void setValue( const RealType& value );
+
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       *
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
+       * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       *
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_setElement.out
+       */
+      __cuda_callable__
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       *
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
+       * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       *
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementMultiplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_addElement.out
+       *
+       */
+      __cuda_callable__
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
+
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       *
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
+       * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows.
+       *
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       *
+       * \return value of given matrix element.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_getElement.out
+       *
+       */
+      __cuda_callable__
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows for constant instances.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for iteration over matrix rows for constant instances.
+       *
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function& function ) const;
+
+      /**
+       * \brief Method for iteration over matrix rows for non-constant instances.
+       *
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function& function );
+
+      /**
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
+       *
+       * See \ref MultidiagonalMatrix::forRows.
+       *
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forAllRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       *
+       * See \ref MultidiagonalMatrix::forRows.
+       *
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forAllRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      /**
+       * \brief Computes product of matrix and vector.
+       *
+       * More precisely, it computes:
+       *
+       * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector`
+       *
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       *
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType matrixMultiplicator = 1.0,
+                          const RealType outVectorMultiplicator = 0.0,
+                          const IndexType begin = 0,
+                          IndexType end = 0 ) const;
+
+      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
+      void addMatrix( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const MultidiagonalMatrix< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      /**
+       * \brief Assignment of exactly the same matrix type.
+       *
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
+       */
+      MultidiagonalMatrix& operator=( const MultidiagonalMatrix& matrix );
+
+      /**
+       * \brief Assignment of another multidiagonal matrix
+       *
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_,
+                typename IndexAllocator_ >
+      MultidiagonalMatrix& operator=( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix );
+
+      /**
+       * \brief Method for saving the matrix to a file.
+       *
+       * \param file is the output file.
+       */
+      void save( File& file ) const;
+
+      /**
+       * \brief Method for loading the matrix from a file.
+       *
+       * \param file is the input file.
+       */
+      void load( File& file );
+
+      /**
+       * \brief Method for saving the matrix to the file with given filename.
+       *
+       * \param fileName is name of the file.
+       */
+      void save( const String& fileName ) const;
+
+      /**
+       * \brief Method for loading the matrix from the file with given filename.
+       *
+       * \param fileName is name of the file.
+       */
+      void load( const String& fileName );
+
+      /**
+       * \brief Method for printing the matrix to output stream.
+       *
+       * \param str is the output stream.
+       */
+      void print( std::ostream& str ) const;
+
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       *
+       * \return constant reference to the indexer.
+       */
+      const IndexerType& getIndexer() const;
+
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       *
+       * \return non-constant reference to the indexer.
+       */
+      IndexerType& getIndexer();
+
+      /**
+       * \brief Returns padding index denoting padding zero elements.
+       *
+       * These elements are used for efficient data alignment in memory.
+       *
+       * \return value of the padding index.
+       */
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+   protected:
+
+      DiagonalsOffsetsType diagonalsOffsets;
+
+      HostDiagonalsOffsetsType hostDiagonalsOffsets;
+
+      IndexerType indexer;
+
+      ViewType view;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MultidiagonalMatrix.hpp>
diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/MultidiagonalMatrix.hpp
similarity index 63%
rename from src/TNL/Matrices/Multidiagonal.hpp
rename to src/TNL/Matrices/MultidiagonalMatrix.hpp
index be6ec58c50e4c967a7ec05b466fc8ec44f296fe0..e50782d9a6dcb7bb311aa497e805481eec5171ed 100644
--- a/src/TNL/Matrices/Multidiagonal.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrix.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          Multidiagonal.hpp  -  description
+                          MultidiagonalMatrix.hpp  -  description
                              -------------------
     begin                : Oct 13, 2011
     copyright            : (C) 2011 by Tomas Oberhuber
@@ -12,14 +12,14 @@
 
 #include <sstream>
 #include <TNL/Assert.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
 namespace Matrices {
 
 template< typename Device >
-class MultidiagonalDeviceDependentCode;
+class MultidiagonalMatrixDeviceDependentCode;
 
 template< typename Real,
           typename Device,
@@ -27,8 +27,8 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-Multidiagonal()
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix()
 {
 }
 
@@ -39,13 +39,13 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Vector >
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-Multidiagonal( const IndexType rows,
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix( const IndexType rows,
                const IndexType columns,
-               const Vector& diagonalsShifts )
+               const Vector& diagonalsOffsets )
 {
-   TNL_ASSERT_GT( diagonalsShifts.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." );
-   this->setDimensions( rows, columns, diagonalsShifts );
+   TNL_ASSERT_GT( diagonalsOffsets.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." );
+   this->setDimensions( rows, columns, diagonalsOffsets );
 }
 
 template< typename Real,
@@ -54,29 +54,51 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getView() const -> ViewType
+      template< typename ListIndex >
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix( const IndexType rows,
+                     const IndexType columns,
+                     const std::initializer_list< ListIndex > diagonalsOffsets )
 {
-   // TODO: fix when getConstView works
-   return ViewType( const_cast< Multidiagonal* >( this )->values.getView(),
-                    const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(),
-                    const_cast< Multidiagonal* >( this )->hostDiagonalsShifts.getView(),
-                    indexer );
+   Containers::Vector< IndexType, DeviceType, IndexType > shifts( diagonalsOffsets );
+   TNL_ASSERT_GT( shifts.getSize(), 0, "Cannot construct multidiagonal matrix with no diagonals shifts." );
+   this->setDimensions( rows, columns, shifts );
 }
 
-/*template< typename Real,
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename ListIndex, typename ListReal >
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix( const IndexType columns,
+                     const std::initializer_list< ListIndex > diagonalsOffsets,
+                     const std::initializer_list< std::initializer_list< ListReal > >& data )
+{
+   Containers::Vector< IndexType, DeviceType, IndexType > shifts( diagonalsOffsets );
+   TNL_ASSERT_GT( shifts.getSize(), 0, "Cannot construct multidiagonal matrix with no diagonals shifts." );
+   this->setDimensions( data.size(), columns, shifts );
+   this->setElements( data );
+}
+
+template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
 auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getConstView() const -> ConstViewType
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+getView() const -> ViewType
 {
-   return ConstViewType( this->values.getConstView(), indexer );
-}*/
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< MultidiagonalMatrix* >( this )->values.getView(),
+                    const_cast< MultidiagonalMatrix* >( this )->diagonalsOffsets.getView(),
+                    const_cast< MultidiagonalMatrix* >( this )->hostDiagonalsOffsets.getView(),
+                    indexer );
+}
 
 template< typename Real,
           typename Device,
@@ -85,13 +107,10 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 String
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getSerializationType()
 {
-   return String( "Matrices::Multidiagonal< " ) +
-          TNL::getSerializationType< RealType >() + ", [any_device], " +
-          TNL::getSerializationType< IndexType >() + ", " +
-          ( Organization ? "true" : "false" ) + ", [any_allocator], [any_allocator] >";
+   return ViewType::getSerializationType();
 }
 
 template< typename Real,
@@ -101,7 +120,7 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 String
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
@@ -115,19 +134,19 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Vector >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 setDimensions( const IndexType rows,
                const IndexType columns,
-               const Vector& diagonalsShifts )
+               const Vector& diagonalsOffsets )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   this->diagonalsShifts = diagonalsShifts;
-   this->hostDiagonalsShifts = diagonalsShifts;
-   const IndexType minShift = min( diagonalsShifts );
+   this->diagonalsOffsets = diagonalsOffsets;
+   this->hostDiagonalsOffsets = diagonalsOffsets;
+   const IndexType minOffset = min( diagonalsOffsets );
    IndexType nonemptyRows = min( rows, columns );
-   if( rows > columns && minShift < 0 )
-      nonemptyRows = min( rows, nonemptyRows - minShift );
-   this->indexer.set( rows, columns, diagonalsShifts.getSize(), nonemptyRows );
+   if( rows > columns && minOffset < 0 )
+      nonemptyRows = min( rows, nonemptyRows - minOffset );
+   this->indexer.set( rows, columns, diagonalsOffsets.getSize(), nonemptyRows );
    this->values.setSize( this->indexer.getStorageSize() );
    this->values = 0.0;
    this->view = this->getView();
@@ -139,10 +158,10 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
- //  template< typename Vector >
+  template< typename RowCapacitiesVector >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+setRowCapacities( const RowCapacitiesVector& rowLengths )
 {
    if( max( rowLengths ) > 3 )
       throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
@@ -166,11 +185,33 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-const Index&
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getDiagonalsCount() const
+   template< typename ListReal >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+setElements( const std::initializer_list< std::initializer_list< ListReal > >& data )
 {
-   return this->view.getDiagonalsCount();
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      this->getValues() = 0.0;
+      auto row_it = data.begin();
+      for( size_t rowIdx = 0; rowIdx < data.size(); rowIdx++ )
+      {
+         auto data_it = row_it->begin();
+         IndexType i = 0;
+         while( data_it != row_it->end() )
+            this->getRow( rowIdx ).setElement( i++, *data_it++ );
+         row_it ++;
+      }
+   }
+   else
+   {
+      MultidiagonalMatrix< Real, Devices::Host, Index, Organization > hostMatrix(
+         this->getRows(),
+         this->getColumns(),
+         this->getDiagonalsOffsets() );
+      hostMatrix.setElements( data );
+      *this = hostMatrix;
+   }
 }
 
 template< typename Real,
@@ -179,11 +220,11 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getDiagonalsShifts() const -> const DiagonalsShiftsType&
+const Index&
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+getDiagonalsCount() const
 {
-   return this->diagonalsShifts;
+   return this->view.getDiagonalsCount();
 }
 
 template< typename Real,
@@ -192,12 +233,11 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Vector >
-void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getCompressedRowLengths( Vector& rowLengths ) const
+auto
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+getDiagonalsOffsets() const -> const DiagonalsOffsetsType&
 {
-   return this->view.getCompressedRowLengths( rowLengths );
+   return this->diagonalsOffsets;
 }
 
 template< typename Real,
@@ -206,11 +246,12 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-Index
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getNonemptyRowsCount() const
+   template< typename Vector >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
 {
-   return this->indexer.getNonemptyRowsCount();
+   return this->view.getCompressedRowLengths( rowLengths );
 }
 
 template< typename Real,
@@ -220,7 +261,7 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 Index
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getRowLength( const IndexType row ) const
 {
    return this->view.getRowLength( row );
@@ -232,25 +273,12 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-Index
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getMaxRowLength() const
-{
-   return this->view.getMaxRowLength();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          typename RealAllocator,
-          typename IndexAllocator >
-   template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
+   template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-setLike( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m )
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+setLike( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix )
 {
-   this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() );
+   this->setDimensions( matrix.getRows(), matrix.getColumns(), matrix.getDiagonalsOffsets() );
 }
 
 template< typename Real,
@@ -260,10 +288,10 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 Index
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getNumberOfNonzeroMatrixElements() const
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+getNonzeroElementsCount() const
 {
-   return this->view.getNumberOfNonzeroMatrixElements();
+   return this->view.getNonzeroElementsCount();
 }
 
 template< typename Real,
@@ -273,7 +301,7 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 reset()
 {
    Matrix< Real, Device, Index >::reset();
@@ -285,16 +313,16 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
+   template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ >
 bool
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-operator == ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+operator == ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const
 {
    if( Organization == Organization_ )
       return this->values == matrix.values;
    else
    {
-      TNL_ASSERT( false, "TODO" );
+      TNL_ASSERT_TRUE( false, "TODO" );
    }
 }
 
@@ -304,10 +332,10 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
+   template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ >
 bool
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-operator != ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+operator != ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const
 {
    return ! this->operator==( matrix );
 }
@@ -319,7 +347,7 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 setValue( const RealType& v )
 {
    this->view.setValue( v );
@@ -333,7 +361,7 @@ template< typename Real,
           typename IndexAllocator >
 __cuda_callable__
 auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
    return this->view.getRow( rowIdx );
@@ -347,7 +375,7 @@ template< typename Real,
           typename IndexAllocator >
 __cuda_callable__
 auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
    return this->view.getRow( rowIdx );
@@ -359,8 +387,9 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
+__cuda_callable__
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
    this->view.setElement( row, column, value );
@@ -372,8 +401,9 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
+__cuda_callable__
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 addElement( const IndexType row,
             const IndexType column,
             const RealType& value,
@@ -388,8 +418,9 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
+__cuda_callable__
 Real
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
    return this->view.getElement( row, column );
@@ -403,7 +434,7 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
@@ -417,7 +448,21 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
@@ -429,12 +474,12 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Function >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-forRows( IndexType first, IndexType last, Function& function ) const
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->view.forRows( first, last, function );
+   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -443,10 +488,10 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-  template< typename Function >
+   template< typename Function >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-forRows( IndexType first, IndexType last, Function& function )
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
 {
    this->view.forRows( first, last, function );
 }
@@ -457,12 +502,12 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-   template< typename Function >
+  template< typename Function >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-forAllRows( Function& function ) const
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
 {
-   this->view.forRows( 0, this->getRows(), function );
+   this->view.forRows( first, last, function );
 }
 
 template< typename Real,
@@ -473,8 +518,8 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Function >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-forAllRows( Function& function )
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forAllRows( Function& function ) const
 {
    this->view.forRows( 0, this->getRows(), function );
 }
@@ -485,13 +530,12 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-template< typename Vector >
-__cuda_callable__
-typename Vector::RealType
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-rowVectorProduct( const IndexType row, const Vector& vector ) const
+   template< typename Function >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forAllRows( Function& function )
 {
-   return this->view.rowVectorProduct();
+   this->view.forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
@@ -503,10 +547,16 @@ template< typename Real,
    template< typename InVector,
              typename OutVector >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-vectorProduct( const InVector& inVector, OutVector& outVector ) const
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType matrixMultiplicator,
+               const RealType outVectorMultiplicator,
+               const IndexType firstRow,
+               IndexType lastRow ) const
 {
-   this->view.vectorProduct( inVector, outVector );
+   this->view.vectorProduct( inVector, outVector, matrixMultiplicator,
+                              outVectorMultiplicator, firstRow, lastRow );
 }
 
 template< typename Real,
@@ -517,8 +567,8 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-addMatrix( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+addMatrix( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
@@ -530,8 +580,8 @@ template< typename Real,
           typename Real2,
           typename Index,
           typename Index2 >
-__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
-                                                             Multidiagonal< Real, Devices::Cuda, Index >* outMatrix,
+__global__ void MultidiagonalMatrixTranspositionCudaKernel( const MultidiagonalMatrix< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             MultidiagonalMatrix< Real, Devices::Cuda, Index >* outMatrix,
                                                              const Real matrixMultiplicator,
                                                              const Index gridIdx )
 {
@@ -560,8 +610,9 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Real2, typename Index2 >
-void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix,
-                                                                    const RealType& matrixMultiplicator )
+void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+getTransposition( const MultidiagonalMatrix< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
                std::cerr << "This matrix rows: " << this->getRows() << std::endl
@@ -580,8 +631,8 @@ void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAlloc
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      Multidiagonal* kernel_this = Cuda::passToDevice( *this );
-      typedef  Multidiagonal< Real2, Device, Index2 > InMatrixType;
+      MultidiagonalMatrix* kernel_this = Cuda::passToDevice( *this );
+      typedef  MultidiagonalMatrix< Real2, Device, Index2 > InMatrixType;
       InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
       dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
       const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
@@ -590,7 +641,7 @@ void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAlloc
       {
          if( gridIdx == cudaGrids - 1 )
             cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-         MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+         MultidiagonalMatrixTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                                     ( kernel_inMatrix,
                                                       kernel_this,
                                                       matrixMultiplicator,
@@ -611,10 +662,11 @@ template< typename Real,
           typename IndexAllocator >
    template< typename Vector1, typename Vector2 >
 __cuda_callable__
-void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b,
-                                                              const IndexType row,
-                                                              Vector2& x,
-                                                              const RealType& omega ) const
+void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
 {
    RealType sum( 0.0 );
    if( row > 0 )
@@ -632,8 +684,8 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >&
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix )
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >&
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::operator=( const MultidiagonalMatrix& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -648,11 +700,11 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ >
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >&
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix )
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >&
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+operator=( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix )
 {
-   using RHSMatrix = Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >;
+   using RHSMatrix = MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >;
    using RHSIndexType = typename RHSMatrix::IndexType;
    using RHSRealType = typename RHSMatrix::RealType;
    using RHSDeviceType = typename RHSMatrix::DeviceType;
@@ -674,7 +726,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAlloc
       }
       else
       {
-         const IndexType maxRowLength = this->diagonalsShifts.getSize();
+         const IndexType maxRowLength = this->diagonalsOffsets.getSize();
          const IndexType bufferRowsCount( 128 );
          const size_t bufferSize = bufferRowsCount * maxRowLength;
          Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
@@ -722,10 +774,10 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( File& file ) const
+void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
-   file << diagonalsShifts;
+   file << diagonalsOffsets;
 }
 
 template< typename Real,
@@ -734,16 +786,16 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( File& file )
+void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
-   file >> this->diagonalsShifts;
-   this->hostDiagonalsShifts = this->diagonalsShifts;
-   const IndexType minShift = min( diagonalsShifts );
+   file >> this->diagonalsOffsets;
+   this->hostDiagonalsOffsets = this->diagonalsOffsets;
+   const IndexType minOffset = min( diagonalsOffsets );
    IndexType nonemptyRows = min( this->getRows(), this->getColumns() );
-   if( this->getRows() > this->getColumns() && minShift < 0 )
-      nonemptyRows = min( this->getRows(), nonemptyRows - minShift );
-   this->indexer.set( this->getRows(), this->getColumns(), diagonalsShifts.getSize(), nonemptyRows );
+   if( this->getRows() > this->getColumns() && minOffset < 0 )
+      nonemptyRows = min( this->getRows(), nonemptyRows - minOffset );
+   this->indexer.set( this->getRows(), this->getColumns(), diagonalsOffsets.getSize(), nonemptyRows );
    this->view = this->getView();
 }
 
@@ -753,7 +805,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( const String& fileName ) const
+void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( const String& fileName ) const
 {
    Object::save( fileName );
 }
@@ -764,7 +816,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator,
           typename IndexAllocator >
-void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( const String& fileName )
+void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( const String& fileName )
 {
    Object::load( fileName );
 }
@@ -776,7 +828,7 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 void
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 print( std::ostream& str ) const
 {
    this->view.print( str );
@@ -789,7 +841,7 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getIndexer() const -> const IndexerType&
 {
    return this->indexer;
@@ -802,32 +854,12 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 auto
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getIndexer() -> IndexerType&
 {
    return this->indexer;
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          typename RealAllocator,
-          typename IndexAllocator >
-__cuda_callable__
-Index Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getElementIndex( const IndexType row, const IndexType column ) const
-{
-   IndexType localIdx = column - row;
-   if( row > 0 )
-      localIdx++;
-
-   TNL_ASSERT_GE( localIdx, 0, "" );
-   TNL_ASSERT_LT( localIdx, 3, "" );
-
-   return this->indexer.getGlobalIndex( row, localIdx );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -836,115 +868,11 @@ template< typename Real,
           typename IndexAllocator >
 __cuda_callable__
 Index
-Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
 getPaddingIndex() const
 {
    return this->view.getPaddingIndex();
 }
 
-/*
-template<>
-class MultidiagonalDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return 2*row + column;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType  >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ 1 ];
-         Index i = 3 * row;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i - 1 ] +
-                   vector[ row ] * values[ i ];
-         return vector[ row - 1 ] * values[ i - 1 ] +
-                vector[ row ] * values[ i ] +
-                vector[ row + 1 ] * values[ i + 1 ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class MultidiagonalDeviceDependentCode< Devices::Cuda >
-{
-   public:
-
-      typedef Devices::Cuda Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return ( column - row + 1 )*rows + row - 1;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ rows - 1 ];
-         Index i = row - 1;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i ] +
-                   vector[ row ] * values[ i + rows ];
-         return vector[ row - 1 ] * values[ i ] +
-                vector[ row ] * values[ i + rows ] +
-                vector[ row + 1 ] * values[ i + 2*rows ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
- */
-
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
index 0825d6fb365ebd6552ee033d41a1fe208219a14e..181974b72cb491174c0a51a157ddc35c952c2da3 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
@@ -13,37 +13,139 @@
 namespace TNL {
 namespace Matrices {   
 
+/**
+ * \brief RowView is a simple structure for accessing rows of multidiagonal matrix.
+ * 
+ * \tparam ValuesView is a vector view storing the matrix elements values.
+ * \tparam Indexer is type of object responsible for indexing and organization of
+ *    matrix elements.
+ * \tparam DiagonalsOffsetsView_ is a container view holding offsets of
+ *    diagonals of multidiagonal matrix.
+ * 
+ * See \ref MultidiagonalMatrix and \ref MultidiagonalMatrixView.
+ * 
+ * \par Example
+ * \include Matrices/MultidiagonalMatrixExample_getRow.cpp
+ * \par Output
+ * \include MultidiagonalatrixExample_getRow.out
+ * 
+ * \par Example
+ * \include Matrices/MultidiagonalMatrixViewExample_getRow.cpp
+ * \par Output
+ * \include MultidiagonalMatrixViewExample_getRow.out
+ */
 template< typename ValuesView,
           typename Indexer,
-          typename DiagonalsShiftsView_ >
+          typename DiagonalsOffsetsView_ >
 class MultidiagonalMatrixRowView
 {
    public:
 
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = typename ValuesView::RealType;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
       using IndexType = typename ValuesView::IndexType;
+
+      /**
+       * \brief Type of container view used for storing the matrix elements values.
+       */
       using ValuesViewType = ValuesView;
+
+      /**
+       * \brief Type of object responsible for indexing and organization of
+       * matrix elements.
+       */
       using IndexerType = Indexer;
-      using DiagonalsShiftsView = DiagonalsShiftsView_;
 
+      /**
+       * \brief Type of a container view holding offsets of
+       * diagonals of multidiagonal matrix.
+       */
+      using DiagonalsOffsetsView = DiagonalsOffsetsView_;
+
+      /**
+       * \brief Type of constant container view used for storing the matrix elements values.
+       */
+      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+
+      /**
+       * \brief Type of constant container view used for storing the column indexes of the matrix elements.
+       */
+      using ConstDiagonalsOffsetsViewType = typename DiagonalsOffsetsView::ConstViewType;
+
+      /**
+       * \brief Type of constant indexer view.
+       */
+      using ConstIndexerViewType = typename Indexer::ConstType;
+
+      /**
+       * \brief Type of constant sparse matrix row view.
+       */
+      using ConstViewType = MultidiagonalMatrixRowView< ConstValuesViewType, ConstIndexerViewType, ConstDiagonalsOffsetsViewType >;
+
+      /**
+       * \brief Constructor with all necessary data.
+       * 
+       * \param rowIdx is index of the matrix row this RowView refer to.
+       * \param diagonalsOffsets is a vector view holding offsets of matrix diagonals,
+       * \param values is a vector view holding values of matrix elements.
+       * \param indexer is object responsible for indexing and organization of matrix elements
+       */
       __cuda_callable__
       MultidiagonalMatrixRowView( const IndexType rowIdx,
-                                  const DiagonalsShiftsView& diagonalsShifts,
+                                  const DiagonalsOffsetsView& diagonalsOffsets,
                                   const ValuesViewType& values,
-                                  const IndexerType& indexer);
+                                  const IndexerType& indexer );
 
+      /**
+       * \brief Returns number of diagonals of the multidiagonal matrix.
+       * 
+       * \return number of diagonals of the multidiagonal matrix.
+       */
       __cuda_callable__
       IndexType getSize() const;
 
+      /**
+       * \brief Computes column index of matrix element on given subdiagonal.
+       * 
+       * \param localIdx is an index of the subdiagonal.
+       * 
+       * \return column index of matrix element on given subdiagonal.
+       */
       __cuda_callable__
       const IndexType getColumnIndex( const IndexType localIdx ) const;
 
+      /**
+       * \brief Returns value of matrix element on given subdiagonal.
+       * 
+       * \param localIdx is an index of the subdiagonal.
+       * 
+       * \return constant reference to matrix element value.
+       */
       __cuda_callable__
       const RealType& getValue( const IndexType localIdx ) const;
 
+      /**
+       * \brief Returns value of matrix element on given subdiagonal.
+       * 
+       * \param localIdx is an index of the subdiagonal.
+       * 
+       * \return non-constant reference to matrix element value.
+       */
       __cuda_callable__
       RealType& getValue( const IndexType localIdx );
 
+      /**
+       * \brief Changes value of matrix element on given subdiagonal.
+       * 
+       * \param localIdx is an index of the matrix subdiagonal.
+       * \param value is the new value of the matrix element.
+       */
       __cuda_callable__
       void setElement( const IndexType localIdx,
                        const RealType& value );
@@ -51,7 +153,7 @@ class MultidiagonalMatrixRowView
 
       IndexType rowIdx;
 
-      DiagonalsShiftsView diagonalsShifts;
+      DiagonalsOffsetsView diagonalsOffsets;
 
       ValuesViewType values;
 
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
index 855b8463aa13eb5d21bee65923704d2be1d897ba..37cdd455a9838ac5e67ded80009a44bf4dd05796 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
@@ -13,59 +13,59 @@
 namespace TNL {
 namespace Matrices {   
 
-template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
-MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 MultidiagonalMatrixRowView( const IndexType rowIdx,
-                            const DiagonalsShiftsView& diagonalsShifts,
+                            const DiagonalsOffsetsView& diagonalsOffsets,
                             const ValuesViewType& values,
                             const IndexerType& indexer )
-: rowIdx( rowIdx ), diagonalsShifts( diagonalsShifts ), values( values ), indexer( indexer )
+: rowIdx( rowIdx ), diagonalsOffsets( diagonalsOffsets ), values( values ), indexer( indexer )
 {
 }
 
-template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 getSize() const -> IndexType
 {
-   return diagonalsShifts.getSize();//indexer.getRowSize( rowIdx );
+   return diagonalsOffsets.getSize();//indexer.getRowSize( rowIdx );
 }
 
-template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 getColumnIndex( const IndexType localIdx ) const -> const IndexType
 {
    TNL_ASSERT_GE( localIdx, 0, "" );
    TNL_ASSERT_LT( localIdx, indexer.getDiagonals(), "" );
-   return rowIdx + diagonalsShifts[ localIdx ];
+   return rowIdx + diagonalsOffsets[ localIdx ];
 }
 
-template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 getValue( const IndexType localIdx ) const -> const RealType&
 {
    return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
 }
 
-template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
 auto
-MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 getValue( const IndexType localIdx ) -> RealType&
 {
    return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
 }
 
-template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView >
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
 void 
-MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >::
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 setElement( const IndexType localIdx,
             const RealType& value )
 {
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
index a3ebfe375d818a363acb6da1be90393dde63b672..2319570252d1f19167e39d749527036ee0cf6e91 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -19,6 +19,20 @@
 namespace TNL {
 namespace Matrices {
 
+/**
+ * \brief Implementation of sparse multidiagonal matrix.
+ *
+ * It serves as an accessor to \ref SparseMatrix for example when passing the
+ * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels.
+ *
+ * See \ref MultidiagonalMatrix for more details.
+ * 
+ * \tparam Real is a type of matrix elements.
+ * \tparam Device is a device where the matrix is allocated.
+ * \tparam Index is a type for indexing of the matrix elements.
+ * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder
+ *         or ColumnMajorOrder.
+ */
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
@@ -26,116 +40,522 @@ template< typename Real = double,
 class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
 {
    public:
+
+      // Supporting types - they are not important for the user
+      using BaseType = MatrixView< Real, Device, Index >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using IndexerType = details::MultidiagonalMatrixIndexer< Index, Organization >;
+      using DiagonalsOffsetsView = Containers::VectorView< Index, Device, Index >;
+      using HostDiagonalsOffsetsView = Containers::VectorView< Index, Devices::Host, Index >;
+
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
       using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
       using IndexType = Index;
-      using BaseType = MatrixView< Real, Device, Index >;
-      //using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >;
-      using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >;
-      //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >;
-      using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >;
-      using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, Organization >;
-      using ValuesViewType = typename BaseType::ValuesView;
+
+      /**
+       * \brief Type of related matrix view. 
+       */
       using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >;
+
+      /**
+       * \brief Matrix view type for constant instances.
+       */
       using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
-      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >;
 
-      // TODO: remove this - it is here only for compatibility with original matrix implementation
-      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+      /**
+       * \brief Type for accessing matrix rows.
+       */
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >;
 
+      /**
+       * \brief Helper type for getting self type or its modifications.
+       */
       template< typename _Real = Real,
                 typename _Device = Device,
                 typename _Index = Index,
                 ElementsOrganization Organization_ = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() >
       using Self = MultidiagonalMatrixView< _Real, _Device, _Index, Organization_ >;
 
+      /**
+       * \brief Constructor with no parameters.
+       */
+      __cuda_callable__
       MultidiagonalMatrixView();
 
+      /**
+       * \brief Constructor with all necessary data and views.
+       * 
+       * \param values is a vector view with matrix elements values
+       * \param diagonalsOffsets is a vector view with diagonals offsets
+       * \param hostDiagonalsOffsets is a vector view with a copy of diagonals offsets on the host
+       * \param indexer is an indexer of matrix elements
+       */
+      __cuda_callable__
       MultidiagonalMatrixView( const ValuesViewType& values,
-                               const DiagonalsShiftsView& diagonalsShifts,
-                               const HostDiagonalsShiftsView& hostDiagonalsShifts,
+                               const DiagonalsOffsetsView& diagonalsOffsets,
+                               const HostDiagonalsOffsetsView& hostDiagonalsOffsets,
                                const IndexerType& indexer );
 
+      /**
+       * \brief Copy constructor.
+       * 
+       * \param matrix is an input multidiagonal matrix view.
+       */
+      __cuda_callable__
+      MultidiagonalMatrixView( const MultidiagonalMatrixView& view ) = default;
+
+      /**
+       * \brief Move constructor.
+       * 
+       * \param matrix is an input multidiagonal matrix view.
+       */
+      __cuda_callable__
+      MultidiagonalMatrixView( MultidiagonalMatrixView&& view ) = default;
+
+      /**
+       * \brief Returns a modifiable view of the multidiagonal matrix.
+       * 
+       * \return multidiagonal matrix view.
+       */
       ViewType getView();
 
+      /**
+       * \brief Returns a non-modifiable view of the multidiagonal matrix.
+       * 
+       * \return multidiagonal matrix view.
+       */
       ConstViewType getConstView() const;
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * The string has a form `Matrices::MultidiagonalMatrix< RealType,  [any_device], IndexType, Organization, [any_allocator], [any_allocator] >`.
+       * 
+       * See \ref MultidiagonalMatrix::getSerializationType.
+       * 
+       * \return \ref String with the serialization type.
+       */
       static String getSerializationType();
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * See \ref MultidiagonalMatrix::getSerializationType.
+       * 
+       * \return \ref String with the serialization type.
+       */
       virtual String getSerializationTypeVirtual() const;
 
+      /**
+       * \brief Returns number of diagonals.
+       * 
+       * \return Number of diagonals.
+       */
       __cuda_callable__
       const IndexType& getDiagonalsCount() const;
 
+      /**
+       * \brief Computes number of non-zeros in each row.
+       * 
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_getCompressedRowLengths.out
+       */
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
-      IndexType getNonemptyRowsCount() const;
-
       [[deprecated]]
       IndexType getRowLength( const IndexType row ) const;
 
-      IndexType getMaxRowLength() const;
-
-      IndexType getNumberOfNonzeroMatrixElements() const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ >
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       *
+       * This method really counts the non-zero matrix elements and so
+       * it returns zero for matrix having all allocated elements set to zero.
+       *
+       * \return number of non-zero matrix elements.
+       */
+      IndexType getNonzeroElementsCount() const;
+
+      /**
+       * \brief Comparison operator with another multidiagonal matrix.
+       * 
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * 
+       * \return \e true if both matrices are identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_ >
       bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const;
 
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ >
+      /**
+       * \brief Comparison operator with another multidiagonal matrix.
+       * 
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * 
+       * \param matrix is the source matrix.
+       * 
+       * \return \e true if both matrices are NOT identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_ >
       bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const;
 
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_getRow.out
+       * 
+       * See \ref MultidiagonalMatrixRowView.
+       */
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_getConstRow.out
+       * 
+       * See \ref MultidiagonalMatrixRowView.
+       */
       __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
+      /**
+       * \brief Set all matrix elements to given value.
+       * 
+       * \param value is the new value of all matrix elements.
+       */
       void setValue( const RealType& v );
 
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
+       * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_setElement.out
+       */
+      __cuda_callable__
       void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
+       * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementMultiplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_addElement.out
+       * 
+       */
+      __cuda_callable__
       void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
+       * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows.
+       * 
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       * 
+       * \return value of given matrix element.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_getElement.out
+       * 
+       */
+      __cuda_callable__
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-      MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view );
-
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_rowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_allRowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      /**
+       * \brief Method for performing general reduction on all matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forRows.out
+       */
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function ) const;
 
+      /**
+       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forRows.out
+       */
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function );
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
+       * 
+       * See \ref MultidiagonalMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function ) const;
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       * 
+       * See \ref MultidiagonalMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function );
 
-      template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;
-
+      /**
+       * \brief Computes product of matrix and vector.
+       * 
+       * More precisely, it computes:
+       * 
+       * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector`
+       * 
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * 
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
       template< typename InVector,
                 typename OutVector >
       void vectorProduct( const InVector& inVector,
-                          OutVector& outVector ) const;
+                          OutVector& outVector,
+                          const RealType matrixMultiplicator = 1.0,
+                          const RealType outVectorMultiplicator = 0.0,
+                          const IndexType firstRow = 0,
+                          IndexType lastRow = 0 ) const;
 
       template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ >
       void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix,
@@ -153,30 +573,66 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
+      /**
+       * \brief Assignment of exactly the same matrix type.
+       * 
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
+       */
+      MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view );
+
+      /**
+       * \brief Method for saving the matrix to a file.
+       * 
+       * \param file is the output file.
+       */
       void save( File& file ) const;
 
+      /**
+       * \brief Method for saving the matrix to the file with given filename.
+       * 
+       * \param fileName is name of the file.
+       */
       void save( const String& fileName ) const;
 
+      /**
+       * \brief Method for printing the matrix to output stream.
+       * 
+       * \param str is the output stream.
+       */
       void print( std::ostream& str ) const;
 
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       * 
+       * \return constant reference to the indexer.
+       */
       __cuda_callable__
       const IndexerType& getIndexer() const;
 
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       * 
+       * \return non-constant reference to the indexer.
+       */
       __cuda_callable__
       IndexerType& getIndexer();
 
+      /**
+       * \brief Returns padding index denoting padding zero elements.
+       * 
+       * These elements are used for efficient data alignment in memory.
+       * 
+       * \return value of the padding index.
+       */
       __cuda_callable__
       IndexType getPaddingIndex() const;
 
    protected:
 
-      __cuda_callable__
-      IndexType getElementIndex( const IndexType row,
-                                 const IndexType localIdx ) const;
-
-      DiagonalsShiftsView diagonalsShifts;
+      DiagonalsOffsetsView diagonalsOffsets;
 
-      HostDiagonalsShiftsView hostDiagonalsShifts;
+      HostDiagonalsOffsetsView hostDiagonalsOffsets;
 
       IndexerType indexer;
 };
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 8d772b6b9db525731845110ea2b0f703057a5db9..0bc3c2d36de0bfc86de4ad728e1b174b4a287b18 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <iomanip>
 #include <TNL/Assert.h>
 #include <TNL/Matrices/MultidiagonalMatrixView.h>
 #include <TNL/Exceptions/NotImplementedError.h>
@@ -32,12 +33,12 @@ template< typename Real,
           ElementsOrganization Organization >
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 MultidiagonalMatrixView( const ValuesViewType& values,
-                         const DiagonalsShiftsView& diagonalsShifts,
-                         const HostDiagonalsShiftsView& hostDiagonalsShifts,
+                         const DiagonalsOffsetsView& diagonalsOffsets,
+                         const HostDiagonalsOffsetsView& hostDiagonalsOffsets,
                          const IndexerType& indexer )
 : MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ),
-  diagonalsShifts( diagonalsShifts ),
-  hostDiagonalsShifts( hostDiagonalsShifts ),
+  diagonalsOffsets( diagonalsOffsets ),
+  hostDiagonalsOffsets( hostDiagonalsOffsets ),
   indexer( indexer )
 {
 }
@@ -51,8 +52,8 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getView() -> ViewType
 {
    return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(),
-                    const_cast< MultidiagonalMatrixView* >( this )->diagonalsShifts.getView(),
-                    const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsShifts.getView(),
+                    const_cast< MultidiagonalMatrixView* >( this )->diagonalsOffsets.getView(),
+                    const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsOffsets.getView(),
                     indexer );
 }
 
@@ -65,8 +66,8 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->values.getConstView(),
-                         this->diagonalsShifts.getConstView(),
-                         this->hostDiagonalsShifts.getConstView(),
+                         this->diagonalsOffsets.getConstView(),
+                         this->hostDiagonalsOffsets.getConstView(),
                          indexer );
 }
 
@@ -78,10 +79,10 @@ String
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getSerializationType()
 {
-   return String( "Matrices::Multidiagonal< " ) +
+   return String( "Matrices::MultidiagonalMatrix< " ) +
           TNL::getSerializationType< RealType >() + ", [any_device], " +
           TNL::getSerializationType< IndexType >() + ", " +
-          ( Organization ? "true" : "false" ) + ", [any_allocator] >";
+          TNL::getSerializationType( Organization ) + ", [any_allocator], [any_allocator] >";
 }
 
 template< typename Real,
@@ -104,7 +105,7 @@ const Index&
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getDiagonalsCount() const
 {
-   return this->diagonalsShifts.getSize();
+   return this->diagonalsOffsets.getSize();
 }
 
 template< typename Real,
@@ -131,17 +132,6 @@ getCompressedRowLengths( Vector& rowLengths ) const
    this->allRowsReduction( fetch, reduce, keep, 0 );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-Index
-MultidiagonalMatrixView< Real, Device, Index, Organization >::
-getNonemptyRowsCount() const
-{
-   return this->indexer.getNonemptyRowsCount();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -150,7 +140,7 @@ Index
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getRowLength( const IndexType row ) const
 {
-   return this->diagonalsShifts.getSize();
+   return this->diagonalsOffsets.getSize();
 }
 
 template< typename Real,
@@ -159,18 +149,7 @@ template< typename Real,
           ElementsOrganization Organization >
 Index
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-getMaxRowLength() const
-{
-   return this->diagonalsShifts.getSize();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-Index
-MultidiagonalMatrixView< Real, Device, Index, Organization >::
-getNumberOfNonzeroMatrixElements() const
+getNonzeroElementsCount() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
@@ -192,7 +171,7 @@ operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organizatio
       return this->values == matrix.values;
    else
    {
-      TNL_ASSERT( false, "TODO" );
+      TNL_ASSERT_TRUE( false, "TODO" );
    }
 }
 
@@ -217,7 +196,7 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 setValue( const RealType& v )
 {
    // we dont do this->values = v here because it would set even elements 'outside' the matrix
-   // method getNumberOfNonzeroElements would not well
+   // method getNumberOfNonzeroElements would not work well then
    const RealType newValue = v;
    auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable {
       value = newValue;
@@ -234,7 +213,7 @@ auto
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer );
+   return RowView( rowIdx, this->diagonalsOffsets.getView(), this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -246,13 +225,14 @@ auto
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
-   return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer );
+   return RowView( rowIdx, this->diagonalsOffsets.getView(), this->values.getView(), this->indexer );
 }
 
 template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
+__cuda_callable__
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
@@ -262,17 +242,21 @@ setElement( const IndexType row, const IndexType column, const RealType& value )
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
 
-   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
-      if( row + hostDiagonalsShifts[ i ] == column )
+   for( IndexType i = 0; i < diagonalsOffsets.getSize(); i++ )
+      if( row + diagonalsOffsets.getElement( i ) == column )
       {
-         this->values.setElement( this->getElementIndex( row, i ), value );
+         this->values.setElement( this->indexer.getGlobalIndex( row, i ), value );
          return;
       }
    if( value != 0.0 )
    {
+#ifdef __CUDA_ARCH__
+      TNL_ASSERT_TRUE( false, "" );
+#else
       std::stringstream msg;
       msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in multidiagonal matrix.";
       throw std::logic_error( msg.str() );
+#endif
    }
 }
 
@@ -280,6 +264,7 @@ template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
+__cuda_callable__
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 addElement( const IndexType row,
@@ -292,18 +277,22 @@ addElement( const IndexType row,
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
 
-   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
-      if( row + hostDiagonalsShifts[ i ] == column )
+   for( IndexType i = 0; i < diagonalsOffsets.getSize(); i++ )
+      if( row + diagonalsOffsets.getElement( i ) == column )
       {
-         const Index idx = this->getElementIndex( row, i );
+         const Index idx = this->indexer.getGlobalIndex( row, i );
          this->values.setElement( idx, thisElementMultiplicator * this->values.getElement( idx ) + value );
          return;
       }
    if( value != 0.0 )
    {
+#ifdef __CUDA_ARCH__
+      TNL_ASSERT_TRUE( false, "" );
+#else
       std::stringstream msg;
       msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in multidiagonal matrix.";
       throw std::logic_error( msg.str() );
+#endif
    }
 }
 
@@ -311,6 +300,7 @@ template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
+__cuda_callable__
 Real
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getElement( const IndexType row, const IndexType column ) const
@@ -320,9 +310,9 @@ getElement( const IndexType row, const IndexType column ) const
    TNL_ASSERT_GE( column, 0, "" );
    TNL_ASSERT_LT( column, this->getColumns(), "" );
 
-   for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ )
-      if( row + hostDiagonalsShifts[ i ] == column )
-         return this->values.getElement( this->getElementIndex( row, i ) );
+   for( IndexType localIdx = 0; localIdx < diagonalsOffsets.getSize(); localIdx++ )
+      if( row + diagonalsOffsets.getElement( localIdx ) == column )
+         return this->values.getElement( this->indexer.getGlobalIndex( row, localIdx ) );
    return 0.0;
 }
 
@@ -335,8 +325,8 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 operator=( const MultidiagonalMatrixView& view )
 {
    MatrixView< Real, Device, Index >::operator=( view );
-   this->diagonalsShifts.bind( view.diagonalsShifts );
-   this->hostDiagonalsShifts.bind( view.hostDiagonalsShifts );
+   this->diagonalsOffsets.bind( view.diagonalsOffsets );
+   this->hostDiagonalsOffsets.bind( view.hostDiagonalsOffsets );
    this->indexer = view.indexer;
    return *this;
 }
@@ -352,8 +342,8 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
 {
    using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
-   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
-   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView();
+   const IndexType diagonalsCount = this->diagonalsOffsets.getSize();
    const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
    const auto zero = zero_;
@@ -361,7 +351,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
       Real_ sum( zero );
       for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
       {
-         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ];
          if( columnIdx >= 0 && columnIdx < columns )
             reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) );
       }
@@ -370,6 +360,35 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrixView< Real, Device, Index, Organization >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ )
+{
+   using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
+   const auto values_view = this->values.getConstView();
+   const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView();
+   const IndexType diagonalsCount = this->diagonalsOffsets.getSize();
+   const IndexType columns = this->getColumns();
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      Real_ sum( zero );
+      for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
+      {
+         const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ];
+         if( columnIdx >= 0 && columnIdx < columns )
+            sum = reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) );
+      }
+      keep( rowIdx, sum );
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -382,6 +401,18 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer
    this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+MultidiagonalMatrixView< Real, Device, Index, Organization >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -392,15 +423,15 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
-   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
-   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView();
+   const IndexType diagonalsCount = this->diagonalsOffsets.getSize();
    const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
    bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ )
       {
-         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ];
          if( columnIdx >= 0 && columnIdx < columns )
             function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute );
       }
@@ -418,15 +449,15 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
-   const auto diagonalsShifts_view = this->diagonalsShifts.getConstView();
-   const IndexType diagonalsCount = this->diagonalsShifts.getSize();
+   const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView();
+   const IndexType diagonalsCount = this->diagonalsOffsets.getSize();
    const IndexType columns = this->getColumns();
    const auto indexer = this->indexer;
    bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ )
       {
-         const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ];
+         const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ];
          if( columnIdx >= 0 && columnIdx < columns )
             function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute );
       }
@@ -458,18 +489,6 @@ forAllRows( Function& function )
    this->forRows( 0, this->indexer.getNonemptyRowsCount(), function );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-template< typename Vector >
-__cuda_callable__
-typename Vector::RealType 
-MultidiagonalMatrixView< Real, Device, Index, Organization >::
-rowVectorProduct( const IndexType row, const Vector& vector ) const
-{
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -478,7 +497,12 @@ template< typename Real,
              typename OutVector >
 void 
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-vectorProduct( const InVector& inVector, OutVector& outVector ) const
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType matrixMultiplicator,
+               const RealType outVectorMultiplicator,
+               const IndexType begin,
+               IndexType end ) const
 {
    TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
@@ -491,10 +515,19 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const
    auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
       sum += value;
    };
-   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
-      outVectorView[ row ] = value;
+   auto keeper1 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = matrixMultiplicator * value;
+   };
+   auto keeper2 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
    };
-   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
+
+   if( end == 0 )
+      end = this->getRows();
+   if( outVectorMultiplicator == ( RealType ) 0.0 )
+      this->rowsReduction( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 );
+   else
+      this->rowsReduction( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -670,14 +703,18 @@ void MultidiagonalMatrixView< Real, Device, Index, Organization >::print( std::o
    for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ )
    {
       str <<"Row: " << rowIdx << " -> ";
-      for( IndexType localIdx = 0; localIdx < this->hostDiagonalsShifts.getSize(); localIdx++ )
+      for( IndexType localIdx = 0; localIdx < this->hostDiagonalsOffsets.getSize(); localIdx++ )
       {
-         const IndexType columnIdx = rowIdx + this->hostDiagonalsShifts[ localIdx ];
+         const IndexType columnIdx = rowIdx + this->hostDiagonalsOffsets[ localIdx ];
          if( columnIdx >= 0 && columnIdx < this->columns )
          {
-            auto v = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) );
-            if( v )
-               str << " Col:" << columnIdx << "->" << v  << "\t";
+            auto value = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) );
+            if( value )
+            {
+               std::stringstream str_;
+               str_ << std::setw( 4 ) << std::right << columnIdx << ":" << std::setw( 4 ) << std::left << value;
+               str << std::setw( 10 ) << str_.str();
+            }
          }
       }
       str << std::endl;
@@ -708,7 +745,7 @@ getIndexer() -> IndexerType&
    return this->indexer;
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
@@ -718,7 +755,7 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
    return this->indexer.getGlobalIndex( row, localIdx );
-}
+}*/
 
 template< typename Real,
           typename Device,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 5986431374dbdab46cc7cc8b2950858c08d33579..268af8a332dafb066d6061021b193c06dbf5ddeb 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -22,6 +22,23 @@
 namespace TNL {
 namespace Matrices {
 
+/**
+ * \brief Implementation of sparse matrix, i.e. matrix storing only non-zero elements.
+ * 
+ * \tparam Real is a type of matrix elements.
+ * \tparam Device is a device where the matrix is allocated.
+ * \tparam Index is a type for indexing of the matrix elements.
+ * \tparam MatrixType specifies the type of matrix - its symmetry or binarity. See \ref MatrixType.
+ *    Both symmetric and binary matrix types reduces memory consumption. Binary matrix does not store
+ *    the matrix values explicitly since the non-zero elements can have only value equal to one. Symmetric
+ *    matrices store only lower part of the matrix and its diagonal. The upper part is reconstructed on the fly.
+ *    GeneralMatrix with no symmetry is used by default.
+ * \tparam Segments is a structure representing the sparse matrix format. Depending on the pattern of the non-zero elements
+ *    different matrix formats can perform differently especially on GPUs. By default \ref CSR format is used. See also
+ *    \ref Ellpack, \ref SlicedEllpack, \ref ChunkedEllpack or \ref BiEllpack.
+ * \tparam RealAllocator is allocator for the matrix elements values.
+ * \tparam IndexAllocator is allocator for the matrix elements column indexes.
+ */
 template< typename Real,
           typename Device = Devices::Host,
           typename Index = int,
@@ -31,43 +48,114 @@ template< typename Real,
           typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
 class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 {
+   static_assert(
+         ! MatrixType::isSymmetric() ||
+         ! std::is_same< Device, Devices::Cuda >::value ||
+         ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
+         "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." );
+
    public:
+
+      // Supporting types - they are not important for the user
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+      using ColumnsIndexesVectorType = Containers::Vector< Index, Device, Index, IndexAllocator >;
+      using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
+      using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
+      using RowsCapacitiesType = Containers::Vector< Index, Device, Index, IndexAllocator >;
+      using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >;
+      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
+
+      /**
+       * \brief Test of symmetric matrix type.
+       * 
+       * \return \e true if the matrix is stored as symmetric and \e false otherwise.
+       */
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
-      static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
-      static_assert(
-            ! isSymmetric() ||
-            ! std::is_same< Device, Devices::Cuda >::value ||
-            ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
-            "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." );
+      /**
+       * \brief Test of binary matrix type.
+       * 
+       * \return \e true if the matrix is stored as binary and \e false otherwise.
+       */
+      static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
+      using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = Index;
+
+      /**
+       * \brief Templated type of segments, i.e. sparse matrix format.
+       */
       template< typename Device_, typename Index_, typename IndexAllocator_ >
       using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
+
+      /**
+       * \brief Type of segments used by this matrix. It represents the sparse matrix format.
+       */
       using SegmentsType = Segments< Device, Index, IndexAllocator >;
+
+      /**
+       * \brief Templated view type of segments, i.e. sparse matrix format.
+       */
       template< typename Device_, typename Index_ >
       using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >;
+
+      /**
+       * \brief Type of segments view used by the related matrix view. It represents the sparse matrix format.
+       */
       using SegmentsViewType = typename SegmentsType::ViewType;
-      using SegmentViewType = typename SegmentsType::SegmentViewType;
-      using DeviceType = Device;
-      using IndexType = Index;
+
+      /**
+       * \brief The allocator for matrix elements values.
+       */
       using RealAllocatorType = RealAllocator;
+
+      /**
+       * \brief The allocator for matrix elements column indexes.
+       */
       using IndexAllocatorType = IndexAllocator;
-      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
-      using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
-      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
-      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType;
-      using ValuesViewType = typename ValuesVectorType::ViewType;
-      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
-      using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >;
-      using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
-      using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
+
+      /**
+       * \brief Type of related matrix view. 
+       * 
+       * See \ref SparseMatrixView.
+       */
       using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
+
+      /**
+       * \brief Matrix view type for constant instances.
+       * 
+       * See \ref SparseMatrixView.
+       */
+      using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >;
+
+      /**
+       * \brief Type for accessing matrix rows.
+       */
+      using RowView = SparseMatrixRowView< typename SegmentsType::SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
+
+      /**
+       * \brief Type for accessing constant matrix rows.
+       */
       using ConstRowView = typename RowView::ConstViewType;
 
+      /**
+       * \brief Helper type for getting self type or its modifications.
+       */
       template< typename _Real = Real,
                 typename _Device = Device,
                 typename _Index = Index,
@@ -77,107 +165,606 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                 typename _IndexAllocator = typename Allocators::Default< _Device >::template Allocator< _Index > >
       using Self = SparseMatrix< _Real, _Device, _Index, _MatrixType, _Segments, _RealAllocator, _IndexAllocator >;
 
-      // TODO: remove this - it is here only for compatibility with original matrix implementation
-      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-
+      /**
+       * \brief Constructor only with values and column indexes allocators.
+       * 
+       * \param realAllocator is used for allocation of matrix elements values.
+       * \param indexAllocator is used for allocation of matrix elements column indexes.
+       */
       SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
-      SparseMatrix( const SparseMatrix& m ) = default;
+      /**
+       * \brief Copy constructor.
+       * 
+       * \param matrix is the source matrix
+       */
+      SparseMatrix( const SparseMatrix& matrix1 ) = default;
 
-      SparseMatrix( SparseMatrix&& m ) = default;
+      /**
+       * \brief Move constructor.
+       * 
+       * \param matrix is the source matrix
+       */
+      SparseMatrix( SparseMatrix&& matrix ) = default;
 
+      /**
+       * \brief Constructor with matrix dimensions.
+       * 
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param realAllocator is used for allocation of matrix elements values.
+       * \param indexAllocator is used for allocation of matrix elements column indexes.
+       */
       SparseMatrix( const IndexType rows,
                     const IndexType columns,
                     const RealAllocatorType& realAllocator = RealAllocatorType(),
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
-      SparseMatrix( const std::initializer_list< IndexType >& rowCapacities,
-                    const IndexType columns,
-                    const RealAllocatorType& realAllocator = RealAllocatorType(),
-                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+      /**
+       * \brief Constructor with matrix rows capacities and number of columns.
+       * 
+       * The number of matrix rows is given by the size of \e rowCapacities list.
+       * 
+       * \tparam ListIndex is the initializer list values type.
+       * \param rowCapacities is a list telling how many matrix elements must be
+       *    allocated in each row.
+       * \param columns is the number of matrix columns.
+       * \param realAllocator is used for allocation of matrix elements values.
+       * \param indexAllocator is used for allocation of matrix elements column indexes.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp
+       * \par Output
+       * \include SparseMatrixExample_Constructor_init_list_1.out
+       */
+      template< typename ListIndex >
+      explicit SparseMatrix( const std::initializer_list< ListIndex >& rowCapacities,
+                             const IndexType columns,
+                             const RealAllocatorType& realAllocator = RealAllocatorType(),
+                             const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
-      SparseMatrix( const IndexType rows,
-                    const IndexType columns,
-                    const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data,
-                    const RealAllocatorType& realAllocator = RealAllocatorType(),
-                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
+      /**
+       * \brief Constructor with matrix dimensions and data in initializer list.
+       * 
+       * The matrix elements values are given as a list \e data of triples:
+       * { { row1, column1, value1 },
+       *   { row2, column2, value2 },
+       * ... }.
+       * 
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param data is a list of matrix elements values.
+       * \param realAllocator is used for allocation of matrix elements values.
+       * \param indexAllocator is used for allocation of matrix elements column indexes.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp
+       * \par Output
+       * \include SparseMatrixExample_Constructor_init_list_2.out
+       */
+      explicit SparseMatrix( const IndexType rows,
+                             const IndexType columns,
+                             const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data,
+                             const RealAllocatorType& realAllocator = RealAllocatorType(),
+                             const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
+      /**
+       * \brief Constructor with matrix dimensions and data in std::map.
+       * 
+       * The matrix elements values are given as a map \e data where keys are
+       * std::pair of matrix coordinates ( {row, column} ) and value is the
+       * matrix element value.
+       * 
+       * \tparam MapIndex is a type for indexing rows and columns.
+       * \tparam MapValue is a type for matrix elements values in the map.
+       * 
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param map is std::map containing matrix elements.
+       * \param realAllocator is used for allocation of matrix elements values.
+       * \param indexAllocator is used for allocation of matrix elements column indexes.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp
+       * \par Output
+       * \include SparseMatrixExample_Constructor_std_map.out
+       */
       template< typename MapIndex,
                 typename MapValue >
       explicit SparseMatrix( const IndexType rows,
                              const IndexType columns,
-                             const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map );
-
-      virtual void setDimensions( const IndexType rows,
-                                  const IndexType columns ) override;
+                             const std::map< std::pair< MapIndex, MapIndex >, MapValue >& map,
+                             const RealAllocatorType& realAllocator = RealAllocatorType(),
+                             const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
+      /**
+       * \brief Returns a modifiable view of the sparse matrix.
+       * 
+       * See \ref SparseMatrixView.
+       * 
+       * \return sparse matrix view.
+       */
       ViewType getView() const; // TODO: remove const
 
+      /**
+       * \brief Returns a non-modifiable view of the sparse matrix.
+       * 
+       * See \ref SparseMatrixView.
+       * 
+       * \return sparse matrix view.
+       */
       ConstViewType getConstView() const;
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * The string has a form `Matrices::SparseMatrix< RealType,  [any_device], IndexType, General/Symmetric, Format, [any_allocator] >`.
+       * 
+       * \return \ref String with the serialization type.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include SparseMatrixExample_getSerializationType.out
+       */
       static String getSerializationType();
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * See \ref SparseMatrix::getSerializationType.
+       * 
+       * \return \e String with the serialization type.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include SparseMatrixExample_getSerializationType.out
+       */
       virtual String getSerializationTypeVirtual() const;
 
+      /**
+       * \brief Set number of rows and columns of this matrix.
+       * 
+       * \param rows is the number of matrix rows.
+       * \param columns is the number of matrix columns.
+       */
+      virtual void setDimensions( const IndexType rows,
+                                  const IndexType columns ) override;
+
+      /**
+       * \brief Set the number of matrix rows and columns by the given matrix.
+       * 
+       * \tparam Matrix is matrix type. This can be any matrix having methods 
+       *  \ref getRows and \ref getColumns.
+       * 
+       * \param matrix in the input matrix dimensions of which are to be adopted.
+       */
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
+
+      /**
+       * \brief Allocates memory for non-zero matrix elements.
+       * 
+       * The size of the input vector must be equal to the number of matrix rows.
+       * The number of allocated matrix elements for each matrix row depends on
+       * the sparse matrix format. Some formats may allocate more elements than
+       * required.
+       * 
+       * \tparam RowsCapacitiesVector is a type of vector/array used for row
+       *    capacities setting.
+       * 
+       * \param rowCapacities is a vector telling the number of required non-zero
+       *    matrix elements in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp
+       * \par Output
+       * \include SparseMatrixExample_setRowCapacities.out
+       */
       template< typename RowsCapacitiesVector >
       void setRowCapacities( const RowsCapacitiesVector& rowCapacities );
 
       // TODO: Remove this when possible
       template< typename RowsCapacitiesVector >
+      [[deprecated]]
       void setCompressedRowLengths( const RowsCapacitiesVector& rowLengths ) {
          this->setRowCapacities( rowLengths );
       };
 
+      /**
+       * \brief This method sets the sparse matrix elements from initializer list.
+       * 
+       * The number of matrix rows and columns must be set already.
+       * The matrix elements values are given as a list \e data of triples:
+       * { { row1, column1, value1 },
+       *   { row2, column2, value2 },
+       * ... }.
+       * 
+       * \param data is a initializer list of initializer lists representing
+       * list of matrix rows.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp
+       * \par Output
+       * \include SparseMatrixExample_setElements.out
+       */
       void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data );
 
+      /**
+       * \brief This method sets the sparse matrix elements from std::map.
+       * 
+       * The matrix elements values are given as a map \e data where keys are
+       * std::pair of matrix coordinates ( {row, column} ) and value is the
+       * matrix element value.
+       * 
+       * \tparam MapIndex is a type for indexing rows and columns.
+       * \tparam MapValue is a type for matrix elements values in the map.
+       * 
+       * \param map is std::map containing matrix elements.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp
+       * \par Output
+       * \include SparseMatrixExample_setElements_map.out
+       */
       template< typename MapIndex,
                 typename MapValue >
       void setElements( const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map );
 
+      /**
+       * \brief Computes number of non-zeros in each row.
+       * 
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include SparseMatrixExample_getCompressedRowLengths.out
+       */
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
+      /**
+       * \brief Returns capacity of given matrix row.
+       * 
+       * \param row index of matrix row.
+       * \return number of matrix elements allocated for the row.
+       */
       __cuda_callable__
       IndexType getRowCapacity( const IndexType row ) const;
 
-      template< typename Matrix >
-      void setLike( const Matrix& matrix );
-
-      IndexType getNumberOfNonzeroMatrixElements() const;
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       * 
+       * This method really counts the non-zero matrix elements and so
+       * it returns zero for matrix having all allocated elements set to zero.
+       * 
+       * \return number of non-zero matrix elements.
+       */
+      IndexType getNonzeroElementsCount() const;
 
+      /**
+       * \brief Resets the matrix to zero dimensions.
+       */
       void reset();
 
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp
+       * \par Output
+       * \include SparseMatrixExample_getConstRow.out
+       * 
+       * See \ref SparseMatrixRowView.
+       */
       __cuda_callable__
       const ConstRowView getRow( const IndexType& rowIdx ) const;
 
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp
+       * \par Output
+       * \include SparseMatrixExample_getRow.out
+       * 
+       * See \ref SparseMatrixRowView.
+       */
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref SparseMatrix::getRow
+       * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp
+       * \par Output
+       * \include SparseMatrixExample_setElement.out
+       */
       __cuda_callable__
       void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref SparseMatrix::getRow
+       * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementMultiplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp
+       * \par Output
+       * \include SparseMatrixExample_addElement.out
+       * 
+       */
       __cuda_callable__
       void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator );
 
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref SparseMatrix::getRow
+       * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
+       * 
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       * 
+       * \return value of given matrix element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp
+       * \par Output
+       * \include SparseMatrixExample_getElement.out
+       * 
+       */
       __cuda_callable__
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
-      /*template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;*/
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-      /***
-       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+      /**
+       * \brief Method for performing general reduction on all matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function& function ) const;
+
+      /**
+       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function& function );
+
+      /**
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
+       * 
+       * See \ref SparseMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forAllRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       * 
+       * See \ref SparseMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forAllRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      /**
+       * \brief Computes product of matrix and vector.
+       * 
+       * More precisely, it computes:
+       * 
+       * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector`
+       * 
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * 
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
        */
       template< typename InVector,
                 typename OutVector >
@@ -198,24 +785,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                              const RealType& matrixMultiplicator = 1.0 );
        */
 
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function ) const;
-
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
-
-      template< typename Function >
-      void forAllRows( Function& function ) const;
-
-      template< typename Function >
-      void forAllRows( Function& function );
-
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
                                 const IndexType row,
@@ -224,51 +793,116 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       /**
        * \brief Assignment of exactly the same matrix type.
-       * @param matrix
-       * @return
+       * 
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
        */
       SparseMatrix& operator=( const SparseMatrix& matrix );
 
       /**
        * \brief Assignment of dense matrix
+       * 
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
        */
       template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization, typename RealAllocator_ >
       SparseMatrix& operator=( const DenseMatrix< Real_, Device_, Index_, Organization, RealAllocator_ >& matrix );
 
 
       /**
-       * \brief Assignment of any other matrix type.
-       * @param matrix
-       * @return
+       * \brief Assignment of any matrix type other then this and dense.
+       * .
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
        */
       template< typename RHSMatrix >
       SparseMatrix& operator=( const RHSMatrix& matrix );
 
+      /**
+       * \brief Comparison operator with another arbitrary matrix type.
+       * 
+       * \param matrix is the right-hand side matrix.
+       * \return \e true if the RHS matrix is equal, \e false otherwise.
+       */
       template< typename Matrix >
       bool operator==( const Matrix& m ) const;
 
+      /**
+       * \brief Comparison operator with another arbitrary matrix type.
+       * 
+       * \param matrix is the right-hand side matrix.
+       * \return \e true if the RHS matrix is equal, \e false otherwise.
+       */
       template< typename Matrix >
       bool operator!=( const Matrix& m ) const;
 
-      void save( File& file ) const;
-
-      void load( File& file );
-
+      /**
+       * \brief Method for saving the matrix to the file with given filename.
+       * 
+       * \param fileName is name of the file.
+       */
       void save( const String& fileName ) const;
 
+      /**
+       * \brief Method for loading the matrix from the file with given filename.
+       * 
+       * \param fileName is name of the file.
+       */
       void load( const String& fileName );
 
+      /**
+       * \brief Method for saving the matrix to a file.
+       * 
+       * \param fileName is name of the file.
+       */
+      void save( File& file ) const;
+
+      /**
+       * \brief Method for loading the matrix from a file.
+       * 
+       * \param fileName is name of the file.
+       */
+      void load( File& file );
+
+      /**
+       * \brief Method for printing the matrix to output stream.
+       * 
+       * \param str is the output stream.
+       */
       void print( std::ostream& str ) const;
 
+      /**
+       * \brief Returns a padding index value.
+       * 
+       * Padding index is used for column indexes of padding zeros. Padding zeros
+       * are used in some sparse matrix formats for better data alignment in memory.
+       * 
+       * \return value of the padding index.
+       */
       __cuda_callable__
       IndexType getPaddingIndex() const;
 
+      /**
+       * \brief Getter of segments for non-constant instances.
+       * 
+       * \e Segments are a structure for addressing the matrix elements columns and values.
+       * In fact, \e Segments represent the sparse matrix format.
+       * 
+       * \return Non-constant reference to segments.
+       */
       SegmentsType& getSegments();
 
+      /**
+       * \brief Getter of segments for constant instances.
+       * 
+       * \e Segments are a structure for addressing the matrix elements columns and values.
+       * In fact, \e Segments represent the sparse matrix format.
+       * 
+       * \return Constant reference to segments.
+       */
       const SegmentsType& getSegments() const;
 
-// TODO: restore it and also in Matrix
-//   protected:
+   protected:
 
       ColumnsIndexesVectorType columnIndexes;
 
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index d13537cefaf3aaf409cd1ecc91f1f09ad7cf2e8d..d60d49611dbfff4cad01049d8fdc4ed7657b19cc 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -57,14 +57,15 @@ template< typename Real,
           template< typename, typename, typename > class Segments,
           typename RealAllocator,
           typename IndexAllocator >
+   template< typename ListIndex >
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-SparseMatrix( const std::initializer_list< IndexType >& rowCapacities,
+SparseMatrix( const std::initializer_list< ListIndex >& rowCapacities,
               const IndexType columns,
               const RealAllocatorType& realAllocator,
               const IndexAllocatorType& indexAllocator )
 : BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator )
 {
-   this->setCompressedRowLengths( RowsCapacitiesType( rowCapacities ) );
+   this->setRowCapacities( RowsCapacitiesType( rowCapacities ) );
 }
 
 template< typename Real,
@@ -97,29 +98,15 @@ template< typename Real,
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 SparseMatrix( const IndexType rows,
               const IndexType columns,
-              const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map )
+              const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map,
+              const RealAllocatorType& realAllocator,
+              const IndexAllocatorType& indexAllocator )
+: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator )
 {
    this->setDimensions( rows, columns );
    this->setElements( map );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-void
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setDimensions( const IndexType rows,
-               const IndexType columns )
-{
-   BaseType::setDimensions( rows, columns );
-   segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( rows, 0 ) );
-   this->view = this->getView();
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -167,10 +154,7 @@ String
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
 getSerializationType()
 {
-   return String( "Matrices::SparseMatrix< " ) +
-             TNL::getSerializationType< RealType >() + ", " +
-             TNL::getSerializationType< SegmentsType >() + ", [any_device], " +
-             TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
+   return ViewType::getSerializationType();
 }
 
 template< typename Real,
@@ -187,6 +171,41 @@ getSerializationTypeVirtual() const
    return this->getSerializationType();
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setDimensions( const IndexType rows,
+               const IndexType columns )
+{
+   BaseType::setDimensions( rows, columns );
+   segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( rows, 0 ) );
+   this->view = this->getView();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Matrix_ >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+setLike( const Matrix_& matrix )
+{
+   BaseType::setLike( matrix );
+   this->segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( matrix.getRows(), 0 ) ),
+   this->view = this->getView();
+   TNL_ASSERT_EQ( this->getRows(), segments.getSegmentsCount(), "mismatched segments count" );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -320,24 +339,6 @@ getRowCapacity( const IndexType row ) const
    return this->view.getRowCapacity( row );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-   template< typename Matrix_ >
-void
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-setLike( const Matrix_& matrix )
-{
-   BaseType::setLike( matrix );
-   this->segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( matrix.getRows(), 0 ) ),
-   this->view = this->getView();
-   TNL_ASSERT_EQ( this->getRows(), segments.getSegmentsCount(), "mismatched segments count" );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -347,9 +348,9 @@ template< typename Real,
           typename IndexAllocator >
 Index
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-getNumberOfNonzeroMatrixElements() const
+getNonzeroElementsCount() const
 {
-   return this->view.getNumberOfNonzeroMatrixElements();
+   return this->view.getNonzeroElementsCount();
 }
 
 template< typename Real,
@@ -446,23 +447,6 @@ getElement( const IndexType row,
    return this->view.getElement( row, column );
 }
 
-/*template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename, typename > class Segments,
-          typename RealAllocator,
-          typename IndexAllocator >
-   template< typename Vector >
-__cuda_callable__
-typename Vector::RealType
-SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-rowVectorProduct( const IndexType row,
-                  const Vector& vector ) const
-{
-   return this->view.rowVectorProduct( row, vector );
-}*/
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -494,9 +478,39 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+{
+   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
-   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
+allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -524,9 +538,9 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-forRows( IndexType first, IndexType last, Function& function ) const
+forRows( IndexType begin, IndexType end, Function& function ) const
 {
-   this->view.forRows( first, last, function );
+   this->view.forRows( begin, end, function );
 }
 
 template< typename Real,
@@ -539,9 +553,9 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
-forRows( IndexType first, IndexType last, Function& function )
+forRows( IndexType begin, IndexType end, Function& function )
 {
-   this->view.forRows( first, last, function );
+   this->view.forRows( begin, end, function );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h
index c859655ef5ba00fa5ed759e01b6a126dd4fd2324..71555ab399329cb3037216ad9de280def575a871 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.h
+++ b/src/TNL/Matrices/SparseMatrixRowView.h
@@ -17,6 +17,26 @@
 namespace TNL {
 namespace Matrices {
 
+/**
+ * \brief RowView is a simple structure for accessing rows of sparse matrix.
+ * 
+ * \tparam SegmentView is a segment view of segments representing the matrix format.
+ * \tparam ValuesView is a vector view storing the matrix elements values.
+ * \tparam ColumnsIndexesView is a vector view storing the column indexes of the matrix element.
+ * \tparam isBinary tells if the the parent matrix is a binary matrix.
+ * 
+ * See \ref SparseMatrix and \ref SparseMatrixView.
+ * 
+ * \par Example
+ * \include Matrices/SparseMatrixExample_getRow.cpp
+ * \par Output
+ * \include SparseMatrixExample_getRow.out
+ * 
+ * \par Example
+ * \include Matrices/SparseMatrixViewExample_getRow.cpp
+ * \par Output
+ * \include SparseMatrixViewExample_getRow.out
+ */
 template< typename SegmentView,
           typename ValuesView,
           typename ColumnsIndexesView,
@@ -25,46 +45,152 @@ class SparseMatrixRowView
 {
    public:
 
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = typename ValuesView::RealType;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = typename ColumnsIndexesView::IndexType;
+
+      /**
+       * \brief Type representing matrix row format.
+       */
       using SegmentViewType = SegmentView;
-      using IndexType = typename SegmentViewType::IndexType;
+
+      /**
+       * \brief Type of container view used for storing the matrix elements values.
+       */
       using ValuesViewType = ValuesView;
+
+      /**
+       * \brief Type of container view used for storing the column indexes of the matrix elements.
+       */
       using ColumnsIndexesViewType = ColumnsIndexesView;
+
+      /**
+       * \brief Type of constant container view used for storing the matrix elements values.
+       */
       using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+
+      /**
+       * \brief Type of constant container view used for storing the column indexes of the matrix elements.
+       */
       using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
+
+      /**
+       * \brief Type of constant sparse matrix row view.
+       */
       using ConstViewType = SparseMatrixRowView< SegmentView, ConstValuesViewType, ConstColumnsIndexesViewType, isBinary_ >;
 
+      /**
+       * \brief Tells whether the parent matrix is a binary matrix.
+       * @return 
+       */
       static constexpr bool isBinary() { return isBinary_; };
 
+      /**
+       * \brief Constructor with \e segmentView, \e values and \e columnIndexes.
+       * 
+       * \param segmentView instance of SegmentViewType representing matrix row.
+       * \param values is a container view for storing the matrix elements values.
+       * \param columnIndexes is a container view for storing the column indexes of the matrix elements.
+       */
       __cuda_callable__
       SparseMatrixRowView( const SegmentViewType& segmentView,
                            const ValuesViewType& values,
                            const ColumnsIndexesViewType& columnIndexes );
 
+      /**
+       * \brief Returns size of the matrix row, i.e. number of matrix elements in this row.
+       * 
+       * \return Size of the matrix row.
+       */
       __cuda_callable__
       IndexType getSize() const;
 
+      /**
+       * \brief Returns constants reference to a column index of an element with given rank in the row.
+       * 
+       * \param localIdx is the rank of the non-zero element in given row.
+       * 
+       * \return constant reference to the matrix element column index.
+       */
       __cuda_callable__
       const IndexType& getColumnIndex( const IndexType localIdx ) const;
 
+      /**
+       * \brief Returns non-constants reference to a column index of an element with given rank in the row.
+       * 
+       * \param localIdx is the rank of the non-zero element in given row.
+       * 
+       * \return non-constant reference to the matrix element column index.
+       */
       __cuda_callable__
       IndexType& getColumnIndex( const IndexType localIdx );
 
+      /**
+       * \brief Returns constants reference to value of an element with given rank in the row.
+       * 
+       * \param localIdx is the rank of the non-zero element in given row.
+       * 
+       * \return constant reference to the matrix element value.
+       */
       __cuda_callable__
       const RealType& getValue( const IndexType localIdx ) const;
 
+      /**
+       * \brief Returns non-constants reference to value of an element with given rank in the row.
+       * 
+       * \param localIdx is the rank of the non-zero element in given row.
+       * 
+       * \return non-constant reference to the matrix element value.
+       */
       __cuda_callable__
       RealType& getValue( const IndexType localIdx );
 
+      /**
+       * \brief Sets a value of matrix element with given rank in the matrix row.
+       * 
+       * \param localIdx is the rank of the matrix element in the row.
+       * \param value is the new value of the matrix element.
+       */
       __cuda_callable__
       void setValue( const IndexType localIdx,
                      const RealType& value );
 
+      /**
+       * \brief Sets a column index of matrix element with given rank in the matrix row.
+       * 
+       * \param localIdx is the rank of the matrix element in the row.
+       * \param columnIndex is the new column index of the matrix element.
+       */
+      __cuda_callable__
+      void setColumnIndex( const IndexType localIdx,
+                           const RealType& columnIndex );
+
+      /**
+       * \brief Sets both a value and a column index of matrix element with given rank in the matrix row.
+       * 
+       * \param localIdx is the rank of the matrix element in the row.
+       * \param columnIndex is the new column index of the matrix element.
+       * \param value is the new value of the matrix element.
+       */
       __cuda_callable__
       void setElement( const IndexType localIdx,
-                       const IndexType column,
+                       const IndexType columnIndex,
                        const RealType& value );
 
+      /**
+       * \brief Comparison of two matrix rows.
+       * 
+       * The other matrix row can be from any other matrix.
+       * 
+       * \param other is another matrix row.
+       * \return \e true if both rows are the same, \e false otherwise.
+       */
       template< typename _SegmentView,
                 typename _ValuesView,
                 typename _ColumnsIndexesView,
@@ -81,6 +207,13 @@ class SparseMatrixRowView
       ColumnsIndexesViewType columnIndexes;
 };
 
+/**
+ * \brief Insertion operator for a sparse matrix row.
+ * 
+ * \param str is an output stream.
+ * \param row is an input sparse matrix row.
+ * \return  reference to the output stream.
+ */
 template< typename SegmentView,
           typename ValuesView,
           typename ColumnsIndexesView,
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
index 545e395fc309cc193fe6f0ed58bff92f7218a6de..a0a9c2604b8ee61d4fa7334f4b550756ea7fffcf 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.hpp
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -140,7 +140,7 @@ operator==( const SparseMatrixRowView< _SegmentView, _ValuesView, _ColumnsIndexe
    while( i < getSize() && i < other.getSize() ) {
       if( getColumnIndex( i ) != other.getColumnIndex( i ) )
          return false;
-      if( getValue( i ) != other.getValue( i ) )
+      if( ! _isBinary && getValue( i ) != other.getValue( i ) )
          return false;
       ++i;
    }
@@ -163,7 +163,11 @@ std::ostream& operator<<( std::ostream& str, const SparseMatrixRowView< SegmentV
 {
    using NonConstIndex = std::remove_const_t< typename SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::IndexType >;
    for( NonConstIndex i = 0; i < row.getSize(); i++ )
-      str << " [ " << row.getColumnIndex( i ) << " ] = " << row.getValue( i ) << ", ";
+      if( isBinary_ )
+         // TODO: check getPaddingIndex(), print only the column indices of non-zeros but not the values
+         str << " [ " << row.getColumnIndex( i ) << " ] = " << (row.getColumnIndex( i ) >= 0) << ", ";
+      else
+         str << " [ " << row.getColumnIndex( i ) << " ] = " << row.getValue( i ) << ", ";
    return str;
 }
 
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index 6bba7d4144618a41704d74ec08f91be04a1d2416..d80db7717bb6f77480da546f567d7a3f808a3268 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -19,6 +19,24 @@
 namespace TNL {
 namespace Matrices {
 
+/**
+ * \brief Implementation of sparse matrix view.
+ *
+ * It serves as an accessor to \ref SparseMatrix for example when passing the
+ * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels.
+ * 
+ * \tparam Real is a type of matrix elements.
+ * \tparam Device is a device where the matrix is allocated.
+ * \tparam Index is a type for indexing of the matrix elements.
+ * \tparam MatrixType specifies the type of matrix - its symmetry or binarity. See \ref MatrixType.
+ *    Both symmetric and binary matrix types reduces memory consumption. Binary matrix does not store
+ *    the matrix values explicitly since the non-zero elements can have only value equal to one. Symmetric
+ *    matrices store only lower part of the matrix and its diagonal. The upper part is reconstructed on the fly.
+ *    GeneralMatrix with no symmetry is used by default.
+ * \tparam Segments is a structure representing the sparse matrix format. Depending on the pattern of the non-zero elements
+ *    different matrix formats can perform differently especially on GPUs. By default \ref CSR format is used. See also
+ *    \ref Ellpack, \ref SlicedEllpack, \ref ChunkedEllpack or \ref BiEllpack.
+ */
 template< typename Real,
           typename Device = Devices::Host,
           typename Index = int,
@@ -26,37 +44,108 @@ template< typename Real,
           template< typename Device_, typename Index_ > class SegmentsView = Containers::Segments::CSRView >
 class SparseMatrixView : public MatrixView< Real, Device, Index >
 {
+   static_assert(
+      ! MatrixType::isSymmetric() ||
+      ! std::is_same< Device, Devices::Cuda >::value ||
+      ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
+      "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." );
+
    public:
+
+      // Supporting types - they are not important for the user
+      using BaseType = MatrixView< Real, Device, Index >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+      using ColumnsIndexesViewType = Containers::VectorView< Index, Device, Index >;
+      using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
+      using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >;
+      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
+
+      /**
+       * \brief Test of symmetric matrix type.
+       * 
+       * \return \e true if the matrix is stored as symmetric and \e false otherwise.
+       */
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
+
+      /**
+       * \brief Test of binary matrix type.
+       * 
+       * \return \e true if the matrix is stored as binary and \e false otherwise.
+       */
       static constexpr bool isBinary() { return MatrixType::isBinary(); };
 
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
+      using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = Index;
+
+      /**
+       * \brief Templated type of segments view, i.e. sparse matrix format.
+       */
       template< typename Device_, typename Index_ >
       using SegmentsViewTemplate = SegmentsView< Device_, Index_ >;
+
+      /**
+       * \brief Type of segments view used by this matrix. It represents the sparse matrix format.
+       */
       using SegmentsViewType = SegmentsView< Device, Index >;
-      using SegmentViewType = typename SegmentsViewType::SegmentViewType;
-      using DeviceType = Device;
-      using IndexType = Index;
-      using BaseType = MatrixView< Real, Device, Index >;
-      using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >;
-      using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
-      using ValuesViewType = typename BaseType::ValuesView;
-      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
-      using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >;
-      using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
-      using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >;
-      using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
-      using ConstRowView = typename RowView::ConstViewType;
 
-      // TODO: remove this - it is here only for compatibility with original matrix implementation
-      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+      /**
+       * \brief Type of related matrix view. 
+       */
+      using ViewType = SparseMatrixView< std::remove_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >;
+
+      /**
+       * \brief Matrix view type for constant instances.
+       */
+      using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, std::add_const_t< Index >, MatrixType, SegmentsViewTemplate >;
+
+      /**
+       * \brief Type for accessing matrix rows.
+       */
+      using RowView = SparseMatrixRowView< typename SegmentsViewType::SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
+
+      /**
+       * \brief Type for accessing constant matrix rows.
+       */
+      using ConstRowView = typename RowView::ConstViewType;
 
+      /**
+       * \brief Helper type for getting self type or its modifications.
+       */
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                typename _MatrixType = MatrixType,
+                template< typename, typename > class _SegmentsView = SegmentsView >
+      using Self = SparseMatrixView< _Real, _Device, _Index, _MatrixType, _SegmentsView >;
+
+      /**
+       * \brief Constructor with no parameters.
+       */
       __cuda_callable__
       SparseMatrixView();
 
+      /**
+       * \brief Constructor with all necessary data and views.
+       * 
+       * \param rows is a number of matrix rows.
+       * \param columns is a number of matrix columns.
+       * \param values is a vector view with matrix elements values.
+       * \param columnIndexes is a vector view with matrix elements column indexes.
+       * \param segments is a segments view representing the sparse matrix format.
+       */
       __cuda_callable__
       SparseMatrixView( const IndexType rows,
                         const IndexType columns,
@@ -64,88 +153,423 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
                         const ColumnsIndexesViewType& columnIndexes,
                         const SegmentsViewType& segments );
 
+      /**
+       * \brief Copy constructor.
+       * 
+       * \param matrix is an input sparse matrix view.
+       */
       __cuda_callable__
-      SparseMatrixView( const SparseMatrixView& m ) = default;
+      SparseMatrixView( const SparseMatrixView& matrix ) = default;
 
-      //__cuda_callable__
-      //SparseMatrixView( const SparseMatrixView&& m ) = default;
+      /**
+       * \brief Move constructor.
+       * 
+       * \param matrix is an input sparse matrix view.
+       */
+      __cuda_callable__
+      SparseMatrixView( SparseMatrixView&& matrix ) = default;
 
+      /**
+       * \brief Returns a modifiable view of the sparse matrix.
+       * 
+       * \return sparse matrix view.
+       */
       __cuda_callable__
       ViewType getView();
 
+      /**
+       * \brief Returns a non-modifiable view of the sparse matrix.
+       * 
+       * \return sparse matrix view.
+       */
       __cuda_callable__
       ConstViewType getConstView() const;
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * The string has a form `Matrices::SparseMatrix< RealType,  [any_device], IndexType, General/Symmetric, Format, [any_allocator] >`.
+       * 
+       * \return \ref String with the serialization type.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_getSerializationType.out
+       */
       static String getSerializationType();
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * See \ref SparseMatrix::getSerializationType.
+       * 
+       * \return \e String with the serialization type.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include SparseMatrixExample_getSerializationType.out
+       */
       virtual String getSerializationTypeVirtual() const;
 
+      /**
+       * \brief Computes number of non-zeros in each row.
+       * 
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_getCompressedRowLengths.out
+       */
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
+      /**
+       * \brief Returns capacity of given matrix row.
+       * 
+       * \param row index of matrix row.
+       * \return number of matrix elements allocated for the row.
+       */
       __cuda_callable__
       IndexType getRowCapacity( const IndexType row ) const;
 
-      IndexType getNumberOfNonzeroMatrixElements() const;
-
-      void reset();
-
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       * 
+       * This method really counts the non-zero matrix elements and so
+       * it returns zero for matrix having all allocated elements set to zero.
+       * 
+       * \return number of non-zero matrix elements.
+       */
+      IndexType getNonzeroElementsCount() const;
+
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_getConstRow.out
+       * 
+       * See \ref SparseMatrixRowView.
+       */
       __cuda_callable__
       ConstRowView getRow( const IndexType& rowIdx ) const;
 
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_getRow.out
+       * 
+       * See \ref SparseMatrixRowView.
+       */
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref SparseMatrix::getRow
+       * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_setElement.out
+       */
       __cuda_callable__
       void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref SparseMatrix::getRow
+       * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementMultiplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_addElement.out
+       * 
+       */
       __cuda_callable__
       void addElement( IndexType row,
                        IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref SparseMatrix::getRow
+       * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
+       * 
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       * 
+       * \return value of given matrix element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_getElement.out
+       * 
+       */
       __cuda_callable__
       RealType getElement( IndexType row,
                            IndexType column ) const;
 
-      /*template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;*/
-
-      /***
-       * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_rowsReduction.out
        */
-      template< typename InVector,
-                typename OutVector >
-      void vectorProduct( const InVector& inVector,
-                          OutVector& outVector,
-                          const RealType matrixMultiplicator = 1.0,
-                          const RealType outVectorMultiplicator = 0.0,
-                          const IndexType firstRow = 0,
-                          IndexType lastRow = 0 ) const;
-
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_allRowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forRows.out
+       */
       template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function ) const;
-
+      void forRows( IndexType begin, IndexType end, Function& function ) const;
+
+      /**
+       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forRows.out
+       */
       template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
-
+      void forRows( IndexType begin, IndexType end, Function& function );
+
+      /**
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
+       * 
+       * See \ref SparseMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function ) const;
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       * 
+       * See \ref SparseMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function );
 
+      /**
+       * \brief Computes product of matrix and vector.
+       * 
+       * More precisely, it computes:
+       * 
+       * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector`
+       * 
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * 
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType matrixMultiplicator = 1.0,
+                          const RealType outVectorMultiplicator = 0.0,
+                          const IndexType begin = 0,
+                          IndexType end = 0 ) const;
+
       template< typename Vector1, typename Vector2 >
       bool performSORIteration( const Vector1& b,
                                 const IndexType row,
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 1aa4289dc778501f0f3cc52759e8bed1db82d4b7..5475faa56aed9c2c50128e8f0fdd3da4e1c94187 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -92,7 +92,8 @@ getSerializationType()
    return String( "Matrices::SparseMatrix< " ) +
              TNL::getSerializationType< RealType >() + ", " +
              TNL::getSerializationType< SegmentsViewType >() + ", [any_device], " +
-             TNL::getSerializationType< IndexType >() + ", [any_allocator] >";
+             TNL::getSerializationType< IndexType >() + ", " +
+             MatrixType::getSerializationType() + ", [any_allocator], [any_allocator] >";
 }
 
 template< typename Real,
@@ -120,7 +121,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    details::set_size_if_resizable( rowLengths, this->getRows() );
    rowLengths = 0;
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
@@ -149,7 +150,7 @@ template< typename Real,
           template< typename, typename > class SegmentsView >
 Index
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-getNumberOfNonzeroMatrixElements() const
+getNonzeroElementsCount() const
 {
    const auto columns_view = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
@@ -174,9 +175,6 @@ getNumberOfNonzeroMatrixElements() const
             return 0.0;
          return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements
       };
-      //auto reduction = [] __cuda_callable__ ( IndexType& sum, const IndexType& value ) {
-      //   sum += value;
-      //};
       auto keeper = [=] __cuda_callable__ ( IndexType row, const IndexType& value ) mutable {
          row_sums_view[ row ] = value;
       };
@@ -347,22 +345,6 @@ getElement( IndexType row,
    return 0.0;
 }
 
-/*template< typename Real,
-          typename Device,
-          typename Index,
-          typename MatrixType,
-          template< typename, typename > class SegmentsView >
-   template< typename Vector >
-__cuda_callable__
-typename Vector::RealType
-SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-rowVectorProduct( const IndexType row,
-                  const Vector& vector ) const
-{
-   TNL_ASSERT_TRUE( false, "TODO: rowVectorProduct is not implemented yet.");
-   return 0;
-}*/
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -442,23 +424,63 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+{
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   const IndexType paddingIndex_ = this->getPaddingIndex();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+      IndexType& columnIdx = columns_view[ globalIdx ];
+      if( columnIdx != paddingIndex_ )
+      {
+         if( isBinary() )
+            return fetch( rowIdx, columnIdx, 1 );
+         else
+            return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      }
+      return zero;
+   };
+   this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) {
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
       {
          if( isBinary() )
-            return fetch( rowIdx, columnIdx, globalIdx, 1 );
+            return fetch( rowIdx, columnIdx, 1 );
          else
-            return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
+            return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
       }
       return zero;
    };
-   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+   this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
+allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -482,7 +504,7 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-forRows( IndexType first, IndexType last, Function& function ) const
+forRows( IndexType begin, IndexType end, Function& function ) const
 {
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
@@ -494,7 +516,7 @@ forRows( IndexType first, IndexType last, Function& function ) const
          function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
       return true;
    };
-   this->segments.forSegments( first, last, f );
+   this->segments.forSegments( begin, end, f );
 }
 
 template< typename Real,
@@ -505,7 +527,7 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >::
-forRows( IndexType first, IndexType last, Function& function )
+forRows( IndexType begin, IndexType end, Function& function )
 {
    auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
@@ -519,7 +541,7 @@ forRows( IndexType first, IndexType last, Function& function )
       else
          function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute );
    };
-   this->segments.forSegments( first, last, f );
+   this->segments.forSegments( begin, end, f );
 }
 
 template< typename Real,
@@ -709,7 +731,12 @@ print( std::ostream& str ) const
                value = ( RealType ) 1.0;
             else
                value = this->values.getElement( globalIdx );
-            str << " Col:" << column << "->" << value << "\t";
+            if( value )
+            {
+               std::stringstream str_;
+               str_ << std::setw( 4 ) << std::right << column << ":" << std::setw( 4 ) << std::left << value;
+               str << std::setw( 10 ) << str_.str();
+            }
          }
          str << std::endl;
       }
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
deleted file mode 100644
index 25472aa3c9bedd726683e23e8237a27cd1385494..0000000000000000000000000000000000000000
--- a/src/TNL/Matrices/Tridiagonal.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/***************************************************************************
-                          Tridiagonal.h  -  description
-                             -------------------
-    begin                : Nov 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-#include <TNL/Matrices/Matrix.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/TridiagonalMatrixRowView.h>
-#include <TNL/Containers/Segments/Ellpack.h>
-#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
-#include <TNL/Matrices/TridiagonalMatrixView.h>
-
-namespace TNL {
-namespace Matrices {
-
-template< typename Real = double,
-          typename Device = Devices::Host,
-          typename Index = int,
-          ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(),
-          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
-class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
-{
-   public:
-      using RealType = Real;
-      using DeviceType = Device;
-      using IndexType = Index;
-      using RealAllocatorType = RealAllocator;
-      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, Organization >;
-      using ValuesVectorType = typename BaseType::ValuesVectorType;
-      using ValuesViewType = typename ValuesVectorType::ViewType;
-      using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >;
-      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
-      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
-
-      // TODO: remove this - it is here only for compatibility with original matrix implementation
-      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-
-      template< typename _Real = Real,
-                typename _Device = Device,
-                typename _Index = Index >
-      using Self = Tridiagonal< _Real, _Device, _Index >;
-
-      static constexpr ElementsOrganization getOrganization() { return Organization; };
-
-      Tridiagonal();
-
-      Tridiagonal( const IndexType rows, const IndexType columns );
-
-      ViewType getView() const; // TODO: remove const
-
-      //ConstViewType getConstView() const;
-
-      static String getSerializationType();
-
-      virtual String getSerializationTypeVirtual() const;
-
-      void setDimensions( const IndexType rows,
-                          const IndexType columns );
-
-      //template< typename Vector >
-      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
-
-      template< typename Vector >
-      void getCompressedRowLengths( Vector& rowLengths ) const;
-
-      [[deprecated]]
-      IndexType getRowLength( const IndexType row ) const;
-
-      IndexType getMaxRowLength() const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      void setLike( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m );
-
-      IndexType getNumberOfNonzeroMatrixElements() const;
-
-      void reset();
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      bool operator == ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      bool operator != ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const;
-
-      __cuda_callable__
-      RowView getRow( const IndexType& rowIdx );
-
-      __cuda_callable__
-      const RowView getRow( const IndexType& rowIdx ) const;
-
-      void setValue( const RealType& v );
-
-      void setElement( const IndexType row,
-                       const IndexType column,
-                       const RealType& value );
-
-      void addElement( const IndexType row,
-                       const IndexType column,
-                       const RealType& value,
-                       const RealType& thisElementMultiplicator = 1.0 );
-
-      RealType getElement( const IndexType row,
-                           const IndexType column ) const;
-
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
-
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function ) const;
-
-      template< typename Function >
-      void forRows( IndexType first, IndexType last, Function& function );
-
-      template< typename Function >
-      void forAllRows( Function& function ) const;
-
-      template< typename Function >
-      void forAllRows( Function& function );
-
-      template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;
-
-      template< typename InVector,
-                typename OutVector >
-      void vectorProduct( const InVector& inVector,
-                          OutVector& outVector ) const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      void addMatrix( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
-                      const RealType& matrixMultiplicator = 1.0,
-                      const RealType& thisMatrixMultiplicator = 1.0 );
-
-      template< typename Real2, typename Index2 >
-      void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                             const RealType& matrixMultiplicator = 1.0 );
-
-      template< typename Vector1, typename Vector2 >
-      __cuda_callable__
-      void performSORIteration( const Vector1& b,
-                                const IndexType row,
-                                Vector2& x,
-                                const RealType& omega = 1.0 ) const;
-
-      // copy assignment
-      Tridiagonal& operator=( const Tridiagonal& matrix );
-
-      // cross-device copy assignment
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-      Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix );
-
-      void save( File& file ) const;
-
-      void load( File& file );
-
-      void save( const String& fileName ) const;
-
-      void load( const String& fileName );
-
-      void print( std::ostream& str ) const;
-
-      const IndexerType& getIndexer() const;
-
-      IndexerType& getIndexer();
-
-      __cuda_callable__
-      IndexType getPaddingIndex() const;
-
-   protected:
-
-      __cuda_callable__
-      IndexType getElementIndex( const IndexType row,
-                                 const IndexType localIdx ) const;
-
-      IndexerType indexer;
-
-      ViewType view;
-};
-
-} // namespace Matrices
-} // namespace TNL
-
-#include <TNL/Matrices/Tridiagonal.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrix.h b/src/TNL/Matrices/TridiagonalMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..afd1392387eaf84fa901877e1a41f07bbdc6aa77
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrix.h
@@ -0,0 +1,811 @@
+/***************************************************************************
+                          TridiagonalMatrix.h  -  description
+                             -------------------
+    begin                : Nov 30, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
+
+namespace TNL {
+namespace Matrices {
+
+/**
+ * \brief Implementation of sparse tridiagonal matrix.
+ *
+ * Use this matrix type for storing of tridiagonal matrices i.e., matrices having
+ * non-zero matrix elements only on its diagonal and immediately above and bellow the diagonal.
+ * This is an example:
+ * \f[
+ * \left(
+ * \begin{array}{ccccccc}
+ *  4  & -1  &  .  & .   &  . & .  \\
+ * -1  &  4  & -1  &  .  &  . & .  \\
+ *  .  & -1  &  4  & -1  &  . & .  \\
+ *  .  &  .  & -1  &  4  & -1 &  . \\
+ *  .  &  .  &  .  & -1  &  4 & -1 \\
+ *  .  &  .  &  .  &  .  & -1 &  4
+ * \end{array}
+ * \right)
+ * \f]
+ *
+ * Advantage is that we do not store the column indexes
+ * explicitly as it is in \ref SparseMatrix. This can reduce significantly the
+ * memory requirements which also means better performance. See the following table
+ * for the storage requirements comparison between \ref TridiagonalMatrix and \ref SparseMatrix.
+ *
+ *  Data types         |      SparseMatrix    | TridiagonalMatrix | Ratio
+ * --------------------|----------------------|---------------------|--------
+ *  float + 32-bit int | 8 bytes per element  | 4 bytes per element | 50%
+ *  double + 32-bit int| 12 bytes per element | 8 bytes per element | 75%
+ *  float + 64-bit int | 12 bytes per element | 4 bytes per element | 30%
+ *  double + 64-bit int| 16 bytes per element | 8 bytes per element | 50%
+ *
+ * \tparam Real is a type of matrix elements.
+ * \tparam Device is a device where the matrix is allocated.
+ * \tparam Index is a type for indexing of the matrix elements.
+ * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder
+ *         or ColumnMajorOrder.
+ * \tparam RealAllocator is allocator for the matrix elements.
+ */
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int,
+          ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(),
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
+class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
+{
+   public:
+
+      // Supporting types - they are not important for the user
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using IndexerType = details::TridiagonalMatrixIndexer< Index, Organization >;
+      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesViewType = typename ValuesVectorType::ViewType;
+
+      /**
+       * \brief The type of matrix elements.
+       */
+      using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
+      using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = Index;
+
+      /**
+       * \brief The allocator for matrix elements values.
+       */
+      using RealAllocatorType = RealAllocator;
+
+      /**
+       * \brief Type of related matrix view.
+       *
+       * See \ref TridiagonalMatrixView.
+       */
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >;
+
+      /**
+       * \brief Matrix view type for constant instances.
+       *
+       * See \ref TridiagonalMatrixView.
+       */
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
+
+      /**
+       * \brief Type for accessing matrix rows.
+       */
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
+
+
+      /**
+       * \brief Helper type for getting self type or its modifications.
+       */
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = TridiagonalMatrix< _Real, _Device, _Index >;
+
+      static constexpr ElementsOrganization getOrganization() { return Organization; };
+
+      /**
+       * \brief Constructor with no parameters.
+       */
+      TridiagonalMatrix();
+
+      /**
+       * \brief Constructor with matrix dimensions.
+       *
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       */
+      TridiagonalMatrix( const IndexType rows, const IndexType columns );
+
+      /**
+       * \brief Constructor with matrix dimensions, diagonals offsets and matrix elements.
+       *
+       * The number of matrix rows is deduced from the size of the initializer list \e data.
+       *
+       * \tparam ListReal is type used in the initializer list defining matrix elements values.
+       *
+       * \param columns is number of matrix columns.
+       * \param data is initializer list holding matrix elements. The size of the outer list
+       *    defines the number of matrix rows. Each inner list defines values of each sub-diagonal
+       *    and so its size should be lower or equal to three. Values
+       *    of sub-diagonals which do not fit to given row are omitted.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_Constructor_init_list_1.out
+       */
+      template< typename ListReal >
+      TridiagonalMatrix( const IndexType columns,
+                         const std::initializer_list< std::initializer_list< ListReal > >& data );
+
+      /**
+       * \brief Copy constructor.
+       *
+       * \param matrix is an input matrix.
+       */
+      TridiagonalMatrix( const TridiagonalMatrix& matrix ) = default;
+
+      /**
+       * \brief Move constructor.
+       *
+       * \param matrix is an input matrix.
+       */
+      TridiagonalMatrix( TridiagonalMatrix&& matrix ) = default;
+
+      /**
+       * \brief Returns a modifiable view of the tridiagonal matrix.
+       *
+       * See \ref TridiagonalMatrixView.
+       *
+       * \return tridiagonal matrix view.
+       */
+      ViewType getView() const; // TODO: remove const
+
+      /**
+       * \brief Returns a non-modifiable view of the tridiagonal matrix.
+       *
+       * See \ref TridiagonalMatrixView.
+       *
+       * \return tridiagonal matrix view.
+       */
+      ConstViewType getConstView() const;
+
+      /**
+       * \brief Returns string with serialization type.
+       *
+       * The string has a form `Matrices::TridiagonalMatrix< RealType,  [any_device], IndexType, ElementsOrganization, [any_allocator] >`.
+       *
+       * \return \ref String with the serialization type.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_getSerializationType.out
+       */
+      static String getSerializationType();
+
+      /**
+       * \brief Returns string with serialization type.
+       *
+       * See \ref TridiagonalMatrix::getSerializationType.
+       *
+       * \return \e String with the serialization type.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_getSerializationType.out
+       */
+      virtual String getSerializationTypeVirtual() const;
+
+      /**
+       * \brief Set matrix dimensions.
+       *
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       */
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
+
+      /**
+       * \brief This method is for compatibility with \ref SparseMatrix.
+       *
+       * It checks if the number of matrix diagonals is compatible with
+       * required number of non-zero matrix elements in each row. If not
+       * exception is thrown.
+       *
+       * \tparam RowCapacitiesVector is vector-like container type for holding required
+       *    row capacities.
+       *
+       * \param rowCapacities is vector-like container holding required row capacities.
+       */
+      template< typename RowCapacitiesVector >
+      void setRowCapacities( const RowCapacitiesVector& rowCapacities );
+
+      /**
+       * \brief Set matrix elements from an initializer list.
+       *
+       * \tparam ListReal is data type of the initializer list.
+       *
+       * \param data is initializer list holding matrix elements. The size of the outer list
+       *    defines the number of matrix rows. Each inner list defines values of each sub-diagonal
+       *    and so its size should be lower or equal to three. Values
+       *    of sub-diagonals which do not fit to given row are omitted.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_setElements.out
+       */
+      template< typename ListReal >
+      void setElements( const std::initializer_list< std::initializer_list< ListReal > >& data );
+
+      /**
+       * \brief Computes number of non-zeros in each row.
+       *
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_getCompressedRowLengths.out
+       */
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
+
+      //[[deprecated]]
+      //IndexType getRowLength( const IndexType row ) const;
+
+      //IndexType getMaxRowLength() const;
+
+      /**
+       * \brief Setup the matrix dimensions and diagonals offsets based on another tridiagonal matrix.
+       *
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * \tparam RealAllocator_ is \e RealAllocator of the source matrix.
+       *
+       * \param matrix is the source matrix.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_ >
+      void setLike( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix );
+
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       *
+       * This method really counts the non-zero matrix elements and so
+       * it returns zero for matrix having all allocated elements set to zero.
+       *
+       * \return number of non-zero matrix elements.
+       */
+      IndexType getNonzeroElementsCount() const;
+
+      /**
+       * \brief Resets the matrix to zero dimensions.
+       */
+      void reset();
+
+      /**
+       * \brief Comparison operator with another tridiagonal matrix.
+       *
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * \tparam RealAllocator_ is \e RealAllocator of the source matrix.
+       *
+       * \return \e true if both matrices are identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_ >
+      bool operator == ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const;
+
+      /**
+       * \brief Comparison operator with another tridiagonal matrix.
+       *
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * \tparam RealAllocator_ is \e RealAllocator of the source matrix.
+       *
+       * \param matrix is the source matrix.
+       *
+       * \return \e true if both matrices are NOT identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_ >
+      bool operator != ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const;
+
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       *
+       * \param rowIdx is matrix row index.
+       *
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_getRow.out
+       *
+       * See \ref TridiagonalMatrixRowView.
+       */
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
+
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       *
+       * \param rowIdx is matrix row index.
+       *
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_getConstRow.out
+       *
+       * See \ref TridiagonalMatrixRowView.
+       */
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
+
+      /**
+       * \brief Set all matrix elements to given value.
+       *
+       * \param value is the new value of all matrix elements.
+       */
+      void setValue( const RealType& value );
+
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       *
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
+       * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       *
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_setElement.out
+       */
+      void setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
+
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       *
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
+       * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       *
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementTriplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_addElement.out
+       *
+       */
+      void addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementTriplicator = 1.0 );
+
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       *
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
+       * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows.
+       *
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       *
+       * \return value of given matrix element.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_getElement.out
+       */
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
+
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows of constant matrix instances.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows of constant matrix instances.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for iteration over matrix rows for constant instances.
+       *
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function& function ) const;
+
+      /**
+       * \brief Method for iteration over matrix rows for non-constant instances.
+       *
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function& function );
+
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       *
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forAllRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function& function ) const;
+
+      /**
+       * \brief Method for iteration over all matrix rows for non-constant instances.
+       *
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forAllRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function& function );
+
+      /*template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;*/
+
+      /**
+       * \brief Computes product of matrix and vector.
+       *
+       * More precisely, it computes:
+       *
+       * `outVector = matrixTriplicator * ( * this ) * inVector + outVectorTriplicator * outVector`
+       *
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       *
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixTriplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorTriplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector,
+                          const RealType matrixTriplicator = 1.0,
+                          const RealType outVectorTriplicator = 0.0,
+                          const IndexType begin = 0,
+                          IndexType end = 0 ) const;
+
+      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
+      void addMatrix( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
+                      const RealType& matrixTriplicator = 1.0,
+                      const RealType& thisMatrixTriplicator = 1.0 );
+
+      template< typename Real2, typename Index2 >
+      void getTransposition( const TridiagonalMatrix< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixTriplicator = 1.0 );
+
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
+
+      /**
+       * \brief Assignment of exactly the same matrix type.
+       *
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
+       */
+      TridiagonalMatrix& operator=( const TridiagonalMatrix& matrix );
+
+      /**
+       * \brief Assignment of another tridiagonal matrix
+       *
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_,
+                typename RealAllocator_ >
+      TridiagonalMatrix& operator=( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix );
+
+      /**
+       * \brief Method for saving the matrix to a file.
+       *
+       * \param file is the output file.
+       */
+      void save( File& file ) const;
+
+      /**
+       * \brief Method for loading the matrix from a file.
+       *
+       * \param file is the input file.
+       */
+      void load( File& file );
+
+      /**
+       * \brief Method for saving the matrix to the file with given filename.
+       *
+       * \param fileName is name of the file.
+       */
+      void save( const String& fileName ) const;
+
+      /**
+       * \brief Method for loading the matrix from the file with given filename.
+       *
+       * \param fileName is name of the file.
+       */
+      void load( const String& fileName );
+
+      /**
+       * \brief Method for printing the matrix to output stream.
+       *
+       * \param str is the output stream.
+       */
+      void print( std::ostream& str ) const;
+
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       *
+       * \return constant reference to the indexer.
+       */
+      const IndexerType& getIndexer() const;
+
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       *
+       * \return non-constant reference to the indexer.
+       */
+      IndexerType& getIndexer();
+
+      /**
+       * \brief Returns padding index denoting padding zero elements.
+       *
+       * These elements are used for efficient data alignment in memory.
+       *
+       * \return value of the padding index.
+       */
+      __cuda_callable__
+      IndexType getPaddingIndex() const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
+
+      IndexerType indexer;
+
+      ViewType view;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/TridiagonalMatrix.hpp>
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/TridiagonalMatrix.hpp
similarity index 65%
rename from src/TNL/Matrices/Tridiagonal.hpp
rename to src/TNL/Matrices/TridiagonalMatrix.hpp
index 8a1804e9d36cff5052a15f7320f1f607927378e6..1d522e40d885b61a43f249595580b48773a7e254 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrix.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          Tridiagonal.hpp  -  description
+                          TridiagonalMatrix.hpp  -  description
                              -------------------
     begin                : Nov 30, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -12,22 +12,19 @@
 
 #include <sstream>
 #include <TNL/Assert.h>
-#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
 namespace Matrices {
 
-template< typename Device >
-class TridiagonalDeviceDependentCode;
-
 template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-Tridiagonal()
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix()
 {
 }
 
@@ -36,8 +33,8 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-Tridiagonal( const IndexType rows, const IndexType columns )
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix( const IndexType rows, const IndexType columns )
 {
    this->setDimensions( rows, columns );
 }
@@ -47,25 +44,27 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-auto
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-getView() const -> ViewType
+   template< typename ListReal >
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix( const IndexType columns,
+                   const std::initializer_list< std::initializer_list< ListReal > >& data )
 {
-   // TODO: fix when getConstView works
-   return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer );
+   this->setDimensions( data.size(), columns );
+   this->setElements( data );
 }
 
-/*template< typename Real,
+template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
 auto
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-getConstView() const -> ConstViewType
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+getView() const -> ViewType
 {
-   return ConstViewType( this->values.getConstView(), indexer );
-}*/
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< TridiagonalMatrix* >( this )->values.getView(), indexer );
+}
 
 template< typename Real,
           typename Device,
@@ -73,10 +72,10 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 String
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getSerializationType()
 {
-   return String( "Matrices::Tridiagonal< " ) +
+   return String( "Matrices::TridiagonalMatrix< " ) +
           TNL::getSerializationType< RealType >() + ", [any_device], " +
           TNL::getSerializationType< IndexType >() + ", " +
           ( Organization ? "true" : "false" ) + ", [any_allocator] >";
@@ -88,7 +87,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 String
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getSerializationTypeVirtual() const
 {
    return this->getSerializationType();
@@ -100,7 +99,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 setDimensions( const IndexType rows, const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
@@ -115,24 +114,24 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
- //  template< typename Vector >
+   template< typename RowCapacitiesVector >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+setRowCapacities( const RowCapacitiesVector& rowCapacities )
 {
-   if( max( rowLengths ) > 3 )
+   if( max( rowCapacities ) > 3 )
       throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( rowLengths.getElement( 0 ) > 2 )
+   if( rowCapacities.getElement( 0 ) > 2 )
       throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    if( this->getRows() > this->getColumns() )
-      if( rowLengths.getElement( this->getRows()-1 ) > 1 )
+      if( rowCapacities.getElement( this->getRows()-1 ) > 1 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() == this->getColumns() )
-      if( rowLengths.getElement( this->getRows()-1 ) > 2 )
+      if( rowCapacities.getElement( this->getRows()-1 ) > 2 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() < this->getColumns() )
-      if( rowLengths.getElement( this->getRows()-1 ) > 3 )
+      if( rowCapacities.getElement( this->getRows()-1 ) > 3 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
 }
 
@@ -141,36 +140,46 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-   template< typename Vector >
+   template< typename ListReal >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-getCompressedRowLengths( Vector& rowLengths ) const
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+setElements( const std::initializer_list< std::initializer_list< ListReal > >& data )
 {
-   return this->view.getCompressedRowLengths( rowLengths );
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      this->getValues() = 0.0;
+      auto row_it = data.begin();
+      for( size_t rowIdx = 0; rowIdx < data.size(); rowIdx++ )
+      {
+         auto data_it = row_it->begin();
+         IndexType i = 0;
+         while( data_it != row_it->end() )
+            this->getRow( rowIdx ).setElement( i++, *data_it++ );
+         row_it ++;
+      }
+   }
+   else
+   {
+      TridiagonalMatrix< Real, Devices::Host, Index, Organization > hostMatrix(
+         this->getRows(),
+         this->getColumns() );
+      hostMatrix.setElements( data );
+      *this = hostMatrix;
+   }
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          typename RealAllocator >
-Index
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-getRowLength( const IndexType row ) const
-{
-   return this->view.getRowLength( row );
-}
 
 template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-Index
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-getMaxRowLength() const
+   template< typename Vector >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
 {
-   return this->view.getMaxRowLength();
+   return this->view.getCompressedRowLengths( rowLengths );
 }
 
 template< typename Real,
@@ -180,8 +189,8 @@ template< typename Real,
           typename RealAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-setLike( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m )
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+setLike( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& m )
 {
    this->setDimensions( m.getRows(), m.getColumns() );
 }
@@ -192,10 +201,10 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 Index
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-getNumberOfNonzeroMatrixElements() const
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+getNonzeroElementsCount() const
 {
-   return this->view.getNumberOfNonzeroMatrixElements();
+   return this->view.getNonzeroElementsCount();
 }
 
 template< typename Real,
@@ -204,7 +213,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 reset()
 {
    Matrix< Real, Device, Index >::reset();
@@ -217,14 +226,14 @@ template< typename Real,
           typename RealAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
 bool
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-operator == ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+operator == ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const
 {
    if( Organization == Organization_ )
       return this->values == matrix.values;
    else
    {
-      TNL_ASSERT( false, "TODO" );
+      TNL_ASSERT_TRUE( false, "TODO" );
    }
 }
 
@@ -235,8 +244,8 @@ template< typename Real,
           typename RealAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
 bool
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-operator != ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+operator != ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const
 {
    return ! this->operator==( matrix );
 }
@@ -247,7 +256,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 setValue( const RealType& v )
 {
    this->view.setValue( v );
@@ -260,7 +269,7 @@ template< typename Real,
           typename RealAllocator >
 __cuda_callable__
 auto
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
    return this->view.getRow( rowIdx );
@@ -273,7 +282,7 @@ template< typename Real,
           typename RealAllocator >
 __cuda_callable__
 auto
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
    return this->view.getRow( rowIdx );
@@ -285,7 +294,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
    this->view.setElement( row, column, value );
@@ -297,7 +306,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 addElement( const IndexType row,
             const IndexType column,
             const RealType& value,
@@ -312,7 +321,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 Real
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
    return this->view.getElement( row, column );
@@ -325,7 +334,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
@@ -338,12 +347,38 @@ template< typename Real,
           typename RealAllocator >
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -351,7 +386,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename Function >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    this->view.forRows( first, last, function );
@@ -364,7 +399,7 @@ template< typename Real,
           typename RealAllocator >
   template< typename Function >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
    this->view.forRows( first, last, function );
@@ -377,7 +412,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename Function >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 forAllRows( Function& function ) const
 {
    this->view.forRows( 0, this->getRows(), function );
@@ -390,7 +425,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename Function >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 forAllRows( Function& function )
 {
    this->view.forRows( 0, this->getRows(), function );
@@ -401,27 +436,17 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-template< typename Vector >
-__cuda_callable__
-typename Vector::RealType
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-rowVectorProduct( const IndexType row, const Vector& vector ) const
-{
-   return this->view.rowVectorProduct();
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization,
-          typename RealAllocator >
-   template< typename InVector,
-             typename OutVector >
+   template< typename InVector, typename OutVector >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-vectorProduct( const InVector& inVector, OutVector& outVector ) const
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType matrixMultiplicator,
+               const RealType outVectorMultiplicator,
+               const IndexType begin,
+               IndexType end ) const
 {
-   this->view.vectorProduct( inVector, outVector );
+   this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, begin, end );
 }
 
 template< typename Real,
@@ -431,8 +456,8 @@ template< typename Real,
           typename RealAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-addMatrix( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+addMatrix( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix,
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
@@ -444,8 +469,8 @@ template< typename Real,
           typename Real2,
           typename Index,
           typename Index2 >
-__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
-                                                             Tridiagonal< Real, Devices::Cuda, Index >* outMatrix,
+__global__ void TridiagonalMatrixTranspositionCudaKernel( const TridiagonalMatrix< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             TridiagonalMatrix< Real, Devices::Cuda, Index >* outMatrix,
                                                              const Real matrixMultiplicator,
                                                              const Index gridIdx )
 {
@@ -473,7 +498,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
    template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::getTransposition( const TridiagonalMatrix< Real2, Device, Index2 >& matrix,
                                                                     const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
@@ -493,8 +518,8 @@ void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTranspo
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
-      typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
+      TridiagonalMatrix* kernel_this = Cuda::passToDevice( *this );
+      typedef  TridiagonalMatrix< Real2, Device, Index2 > InMatrixType;
       InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
       dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
       const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
@@ -503,7 +528,7 @@ void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTranspo
       {
          if( gridIdx == cudaGrids - 1 )
             cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-         TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+         TridiagonalMatrixTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                                     ( kernel_inMatrix,
                                                       kernel_this,
                                                       matrixMultiplicator,
@@ -523,7 +548,7 @@ template< typename Real,
           typename RealAllocator >
    template< typename Vector1, typename Vector2 >
 __cuda_callable__
-void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::performSORIteration( const Vector1& b,
+void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::performSORIteration( const Vector1& b,
                                                               const IndexType row,
                                                               Vector2& x,
                                                               const RealType& omega ) const
@@ -543,8 +568,8 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >&
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::operator=( const Tridiagonal& matrix )
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >&
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::operator=( const TridiagonalMatrix& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -558,9 +583,9 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
    template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ >
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >&
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
-operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix )
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >&
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+operator=( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
@@ -575,17 +600,17 @@ operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocat
       if( std::is_same< Device, Device_ >::value )
       {
          const auto matrix_view = matrix.getView();
-         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
             value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
          };
          this->forAllRows( f );
       }
       else
       {
-         Tridiagonal< Real, Device, Index, Organization_ > auxMatrix;
+         TridiagonalMatrix< Real, Device, Index, Organization_ > auxMatrix;
          auxMatrix = matrix;
          const auto matrix_view = auxMatrix.getView();
-         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
             value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
          };
          this->forAllRows( f );
@@ -599,7 +624,7 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::save( File& file ) const
+void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
 }
@@ -609,7 +634,7 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::load( File& file )
+void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
    this->indexer.setDimensions( this->getRows(), this->getColumns() );
@@ -621,7 +646,7 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::save( const String& fileName ) const
+void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::save( const String& fileName ) const
 {
    Object::save( fileName );
 }
@@ -631,7 +656,7 @@ template< typename Real,
           typename Index,
           ElementsOrganization Organization,
           typename RealAllocator >
-void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::load( const String& fileName )
+void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::load( const String& fileName )
 {
    Object::load( fileName );
 }
@@ -642,7 +667,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 void
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 print( std::ostream& str ) const
 {
    this->view.print( str );
@@ -654,7 +679,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 auto
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getIndexer() const -> const IndexerType&
 {
    return this->indexer;
@@ -666,7 +691,7 @@ template< typename Real,
           ElementsOrganization Organization,
           typename RealAllocator >
 auto
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getIndexer() -> IndexerType&
 {
    return this->indexer;
@@ -679,12 +704,10 @@ template< typename Real,
           typename RealAllocator >
 __cuda_callable__
 Index
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getElementIndex( const IndexType row, const IndexType column ) const
 {
-   IndexType localIdx = column - row;
-   if( row > 0 )
-      localIdx++;
+   IndexType localIdx = column - row + 1;
 
    TNL_ASSERT_GE( localIdx, 0, "" );
    TNL_ASSERT_LT( localIdx, 3, "" );
@@ -699,115 +722,11 @@ template< typename Real,
           typename RealAllocator >
 __cuda_callable__
 Index
-Tridiagonal< Real, Device, Index, Organization, RealAllocator >::
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
 getPaddingIndex() const
 {
    return this->view.getPaddingIndex();
 }
 
-/*
-template<>
-class TridiagonalDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return 2*row + column;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType  >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ 1 ];
-         Index i = 3 * row;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i - 1 ] +
-                   vector[ row ] * values[ i ];
-         return vector[ row - 1 ] * values[ i - 1 ] +
-                vector[ row ] * values[ i ] +
-                vector[ row + 1 ] * values[ i + 1 ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index, Organization, RealAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class TridiagonalDeviceDependentCode< Devices::Cuda >
-{
-   public:
-
-      typedef Devices::Cuda Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return ( column - row + 1 )*rows + row - 1;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ rows - 1 ];
-         Index i = row - 1;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i ] +
-                   vector[ row ] * values[ i + rows ];
-         return vector[ row - 1 ] * values[ i ] +
-                vector[ row ] * values[ i + rows ] +
-                vector[ row + 1 ] * values[ i + 2*rows ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index, Organization, RealAllocator >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
- */
-
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 9496136dcf465cbb2a68c8fb37d0508ad7e10aac..321b0fab76db88c3f404e8188de08225afcf2de6 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -19,6 +19,20 @@
 namespace TNL {
 namespace Matrices {
 
+/**
+ * \brief Implementation of sparse tridiagonal matrix.
+ *
+ * It serves as an accessor to \ref SparseMatrix for example when passing the
+ * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels.
+ *
+ * See \ref TridiagonalMatrix for more details.
+ * 
+ * \tparam Real is a type of matrix elements.
+ * \tparam Device is a device where the matrix is allocated.
+ * \tparam Index is a type for indexing of the matrix elements.
+ * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder
+ *         or ColumnMajorOrder.
+ */
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
@@ -26,102 +40,510 @@ template< typename Real = double,
 class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 {
    public:
+
+
+      // Supporting types - they are not important for the user
+      using BaseType = MatrixView< Real, Device, Index >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using IndexerType = details::TridiagonalMatrixIndexer< Index, Organization >;
+
+      /**
+       * \brief The type of matrix elements.
+       */
       using RealType = Real;
+
+      /**
+       * \brief The device where the matrix is allocated.
+       */
       using DeviceType = Device;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
       using IndexType = Index;
-      using BaseType = MatrixView< Real, Device, Index >;
-      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, Organization >;
-      using ValuesViewType = typename BaseType::ValuesView;
+
+      /**
+       * \brief Type of related matrix view. 
+       */
       using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >;
+
+      /**
+       * \brief Matrix view type for constant instances.
+       */
       using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
-      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
-      // TODO: remove this - it is here only for compatibility with original matrix implementation
-      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+      /**
+       * \brief Type for accessing matrix rows.
+       */
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
+      /**
+       * \brief Helper type for getting self type or its modifications.
+       */
       template< typename _Real = Real,
                 typename _Device = Device,
                 typename _Index = Index,
                 ElementsOrganization Organization_ = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() >
       using Self = TridiagonalMatrixView< _Real, _Device, _Index, Organization_ >;
 
+      /**
+       * \brief Constructor with no parameters.
+       */
+      __cuda_callable__
       TridiagonalMatrixView();
 
+      /**
+       * \brief Constructor with all necessary data and views.
+       * 
+       * \param values is a vector view with matrix elements values
+       * \param indexer is an indexer of matrix elements
+       */
+      __cuda_callable__
       TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer );
 
+      /**
+       * \brief Copy constructor.
+       * 
+       * \param matrix is an input tridiagonal matrix view.
+       */
+      __cuda_callable__
+      TridiagonalMatrixView( const TridiagonalMatrixView& view ) = default;
+
+      /**
+       * \brief Move constructor.
+       * 
+       * \param matrix is an input tridiagonal matrix view.
+       */
+      __cuda_callable__
+      TridiagonalMatrixView( TridiagonalMatrixView&& view ) = default;
+
+      /**
+       * \brief Returns a modifiable view of the tridiagonal matrix.
+       * 
+       * \return tridiagonal matrix view.
+       */
       ViewType getView();
 
+      /**
+       * \brief Returns a non-modifiable view of the tridiagonal matrix.
+       * 
+       * \return tridiagonal matrix view.
+       */
       ConstViewType getConstView() const;
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * The string has a form `Matrices::TridiagonalMatrix< RealType,  [any_device], IndexType, Organization, [any_allocator] >`.
+       * 
+       * See \ref TridiagonalMatrix::getSerializationType.
+       * 
+       * \return \ref String with the serialization type.
+       */
       static String getSerializationType();
 
+      /**
+       * \brief Returns string with serialization type.
+       * 
+       * See \ref TridiagonalMatrix::getSerializationType.
+       * 
+       * \return \ref String with the serialization type.
+       */
       virtual String getSerializationTypeVirtual() const;
 
+      /**
+       * \brief Computes number of non-zeros in each row.
+       * 
+       * \param rowLengths is a vector into which the number of non-zeros in each row
+       * will be stored.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_getCompressedRowLengths.out
+       */
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
 
-      [[deprecated]]
-      IndexType getRowLength( const IndexType row ) const;
-
-      IndexType getMaxRowLength() const;
-
-      IndexType getNumberOfNonzeroMatrixElements() const;
-
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ >
+      //[[deprecated]]
+      //IndexType getRowLength( const IndexType row ) const;
+
+      //IndexType getMaxRowLength() const;
+
+      /**
+       * \brief Returns number of non-zero matrix elements.
+       *
+       * This method really counts the non-zero matrix elements and so
+       * it returns zero for matrix having all allocated elements set to zero.
+       *
+       * \return number of non-zero matrix elements.
+       */
+      IndexType getNonzeroElementsCount() const;
+
+      /**
+       * \brief Comparison operator with another tridiagonal matrix.
+       * 
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * 
+       * \return \e true if both matrices are identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_ >
       bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const;
 
-      template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ >
+      /**
+       * \brief Comparison operator with another multidiagonal matrix.
+       * 
+       * \tparam Real_ is \e Real type of the source matrix.
+       * \tparam Device_ is \e Device type of the source matrix.
+       * \tparam Index_ is \e Index type of the source matrix.
+       * \tparam Organization_ is \e Organization of the source matrix.
+       * 
+       * \param matrix is the source matrix.
+       * 
+       * \return \e true if both matrices are NOT identical and \e false otherwise.
+       */
+      template< typename Real_,
+                typename Device_,
+                typename Index_,
+                ElementsOrganization Organization_ >
       bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const;
 
+      /**
+       * \brief Non-constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_getRow.out
+       * 
+       * See \ref TridiagonalMatrixRowView.
+       */
       __cuda_callable__
       RowView getRow( const IndexType& rowIdx );
 
+      /**
+       * \brief Constant getter of simple structure for accessing given matrix row.
+       * 
+       * \param rowIdx is matrix row index.
+       * 
+       * \return RowView for accessing given matrix row.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_getConstRow.out
+       * 
+       * See \ref TridiagonalMatrixRowView.
+       */
       __cuda_callable__
       const RowView getRow( const IndexType& rowIdx ) const;
 
+      /**
+       * \brief Set all matrix elements to given value.
+       * 
+       * \param value is the new value of all matrix elements.
+       */
       void setValue( const RealType& v );
 
+      /**
+       * \brief Sets element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
+       * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_setElement.out
+       */
+      __cuda_callable__
       void setElement( const IndexType row,
                        const IndexType column,
                        const RealType& value );
 
+      /**
+       * \brief Add element at given \e row and \e column to given \e value.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
+       * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows.
+       * The call may fail if the matrix row capacity is exhausted.
+       * 
+       * \param row is row index of the element.
+       * \param column is columns index of the element.
+       * \param value is the value the element will be set to.
+       * \param thisElementMultiplicator is multiplicator the original matrix element
+       *   value is multiplied by before addition of given \e value.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_addElement.out
+       * 
+       */
+      __cuda_callable__
       void addElement( const IndexType row,
                        const IndexType column,
                        const RealType& value,
                        const RealType& thisElementMultiplicator = 1.0 );
 
+      /**
+       * \brief Returns value of matrix element at position given by its row and column index.
+       * 
+       * This method can be called from the host system (CPU) no matter
+       * where the matrix is allocated. If the matrix is allocated on GPU this method
+       * can be called even from device kernels. If the matrix is allocated in GPU device
+       * this method is called from CPU, it transfers values of each matrix element separately and so the
+       * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
+       * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows.
+       * 
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
+       * 
+       * \return value of given matrix element.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_getElement.out
+       * 
+       */
+      __cuda_callable__
       RealType getElement( const IndexType row,
                            const IndexType column ) const;
 
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_rowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_rowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on all matrix rows for constant instances.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_allRowsReduction.out
+       */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
       void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
+      /**
+       * \brief Method for performing general reduction on all matrix rows.
+       * 
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * 
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_allRowsReduction.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for iteration over all matrix rows for constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forRows.out
+       */
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function ) const;
 
+      /**
+       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * 
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *  The \e localIdx parameter is a rank of the non-zero element in given row. 
+       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  be interrupted.
+       * 
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param function is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forRows.out
+       */
       template< typename Function >
       void forRows( IndexType first, IndexType last, Function& function );
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows (for constant instances).
+       * 
+       * See \ref TridiagonalMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function ) const;
 
+      /**
+       * \brief This method calls \e forRows for all matrix rows.
+       * 
+       * See \ref TridiagonalMatrix::forRows.
+       * 
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
+       * 
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forAllRows.out
+       */
       template< typename Function >
       void forAllRows( Function& function );
 
-      template< typename Vector >
-      __cuda_callable__
-      typename Vector::RealType rowVectorProduct( const IndexType row,
-                                                  const Vector& vector ) const;
-
+      /**
+       * \brief Computes product of matrix and vector.
+       * 
+       * More precisely, it computes:
+       * 
+       * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector`
+       * 
+       * \tparam InVector is type of input vector.  It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * \tparam OutVector is type of output vector. It can be \ref Vector,
+       *     \ref VectorView, \ref Array, \ref ArraView or similar container.
+       * 
+       * \param inVector is input vector.
+       * \param outVector is output vector.
+       * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default.
+       * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added
+       *    to the result of matrix-vector product. It is zero by default.
+       * \param begin is the beginning of the rows range for which the vector product
+       *    is computed. It is zero by default.
+       * \param end is the end of the rows range for which the vector product
+       *    is computed. It is number if the matrix rows by default.
+       */
       template< typename InVector,
                 typename OutVector >
       void vectorProduct( const InVector& inVector,
-                          OutVector& outVector ) const;
+                          OutVector& outVector,
+                          const RealType matrixMultiplicator = 1.0,
+                          const RealType outVectorMultiplicator = 0.0,
+                          const IndexType begin = 0,
+                          IndexType end = 0 ) const;
 
       template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ >
       void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix,
@@ -139,18 +561,58 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
                                 Vector2& x,
                                 const RealType& omega = 1.0 ) const;
 
+      /**
+       * \brief Assignment of exactly the same matrix type.
+       * 
+       * \param matrix is input matrix for the assignment.
+       * \return reference to this matrix.
+       */
+      TridiagonalMatrixView& operator=( const TridiagonalMatrixView& view );
+
+      /**
+       * \brief Method for saving the matrix to a file.
+       * 
+       * \param file is the output file.
+       */
       void save( File& file ) const;
 
+      /**
+       * \brief Method for saving the matrix to the file with given filename.
+       * 
+       * \param fileName is name of the file.
+       */
       void save( const String& fileName ) const;
 
+      /**
+       * \brief Method for printing the matrix to output stream.
+       * 
+       * \param str is the output stream.
+       */
       void print( std::ostream& str ) const;
 
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       * 
+       * \return constant reference to the indexer.
+       */
       __cuda_callable__
       const IndexerType& getIndexer() const;
 
+      /**
+       * \brief This method returns matrix elements indexer used by this matrix.
+       * 
+       * \return non-constant reference to the indexer.
+       */
       __cuda_callable__
       IndexerType& getIndexer();
 
+      /**
+       * \brief Returns padding index denoting padding zero elements.
+       * 
+       * These elements are used for efficient data alignment in memory.
+       * 
+       * \return value of the padding index.
+       */
       __cuda_callable__
       IndexType getPaddingIndex() const;
 
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 092e63cbc00b7bcea7d42ea1d2e0f84389dce8c1..d63e6dd50821370c7e4e41b4b3ede9c55602ad67 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -66,10 +66,10 @@ String
 TridiagonalMatrixView< Real, Device, Index, Organization >::
 getSerializationType()
 {
-   return String( "Matrices::Tridiagonal< " ) +
+   return String( "Matrices::TridiagonalMatrix< " ) +
           TNL::getSerializationType< RealType >() + ", [any_device], " +
           TNL::getSerializationType< IndexType >() + ", " +
-          ( Organization ? "true" : "false" ) + ", [any_allocator] >";
+          TNL::getSerializationType( Organization ) + ", [any_allocator] >";
 }
 
 template< typename Real,
@@ -107,36 +107,13 @@ getCompressedRowLengths( Vector& rowLengths ) const
    this->allRowsReduction( fetch, reduce, keep, 0 );
 }
 
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-Index
-TridiagonalMatrixView< Real, Device, Index, Organization >::
-getRowLength( const IndexType row ) const
-{
-   return this->indexer.getRowSize( row );
-}
-
 template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
 Index
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-getMaxRowLength() const
-{
-   return 3;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-Index
-TridiagonalMatrixView< Real, Device, Index, Organization >::
-getNumberOfNonzeroMatrixElements() const
+getNonzeroElementsCount() const
 {
    const auto values_view = this->values.getConstView();
    auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
@@ -158,7 +135,7 @@ operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_
       return this->values == matrix.values;
    else
    {
-      TNL_ASSERT( false, "TODO" );
+      TNL_ASSERT_TRUE( false, "TODO" );
    }
 }
 
@@ -213,7 +190,7 @@ template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
-void
+__cuda_callable__ void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
@@ -223,9 +200,13 @@ setElement( const IndexType row, const IndexType column, const RealType& value )
    TNL_ASSERT_LT( column, this->getColumns(), "" );
    if( abs( row - column ) > 1 )
    {
+#ifdef __CUDA_ARCH__
+      TNL_ASSERT_TRUE( false, "Wrong matrix element coordinates tridiagonal matrix." );
+#else
       std::stringstream msg;
       msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
       throw std::logic_error( msg.str() );
+#endif
    }
    this->values.setElement( this->getElementIndex( row, column ), value );
 }
@@ -234,7 +215,7 @@ template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
-void
+__cuda_callable__ void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
 addElement( const IndexType row,
             const IndexType column,
@@ -247,9 +228,13 @@ addElement( const IndexType row,
    TNL_ASSERT_LT( column, this->getColumns(), "" );
    if( abs( row - column ) > 1 )
    {
+#ifdef __CUDA_ARCH__
+      TNL_ASSERT_TRUE( false, "Wrong matrix element coordinates tridiagonal matrix." );
+#else
       std::stringstream msg;
       msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
       throw std::logic_error( msg.str() );
+#endif
    }
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
@@ -259,7 +244,7 @@ template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
-Real
+__cuda_callable__ Real
 TridiagonalMatrixView< Real, Device, Index, Organization >::
 getElement( const IndexType row, const IndexType column ) const
 {
@@ -290,8 +275,52 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
       Real_ sum( zero );
       if( rowIdx == 0 )
       {
-         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
          reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
+         reduce( sum, fetch( 0, 2, values_view[ indexer.getGlobalIndex( 0, 2 ) ] ) );
+         keep( 0, sum );
+         return;
+      }
+      if( rowIdx + 1 < indexer.getColumns() )
+      {
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
+         keep( rowIdx, sum );
+         return;
+      }
+      if( rowIdx < indexer.getColumns() )
+      {
+         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
+         keep( rowIdx, sum );
+      }
+      else
+      {
+         keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ )
+{
+   using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
+   auto values_view = this->values.getConstView();
+   const auto indexer = this->indexer;
+   const auto zero = zero_;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      Real_ sum( zero );
+      if( rowIdx == 0 )
+      {
+         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
+         reduce( sum, fetch( 0, 2, values_view[ indexer.getGlobalIndex( 0, 2 ) ] ) );
          keep( 0, sum );
          return;
       }
@@ -329,6 +358,18 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer
    this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+{
+   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -344,9 +385,9 @@ forRows( IndexType first, IndexType last, Function& function ) const
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       if( rowIdx == 0 )
       {
-         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute );
-         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
-      } 
+         function( 0, 1, 0, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
+         function( 0, 2, 1, values_view[ indexer.getGlobalIndex( 0, 2 ) ], compute );
+      }
       else if( rowIdx + 1 < indexer.getColumns() )
       {
          function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
@@ -375,25 +416,26 @@ forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
    const auto indexer = this->indexer;
+   bool compute( true );
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       if( rowIdx == 0 )
       {
-         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
-         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
-      } 
+         function( 0, 1, 0, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
+         function( 0, 2, 1, values_view[ indexer.getGlobalIndex( 0, 2 ) ], compute );
+      }
       else if( rowIdx + 1 < indexer.getColumns() )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute );
       }
       else if( rowIdx < indexer.getColumns() )
       {
-         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
-         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
       }
       else
-         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -422,27 +464,20 @@ forAllRows( Function& function )
    this->forRows( 0, this->indexer.getNonemptyRowsCount(), function );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          ElementsOrganization Organization >
-template< typename Vector >
-__cuda_callable__
-typename Vector::RealType 
-TridiagonalMatrixView< Real, Device, Index, Organization >::
-rowVectorProduct( const IndexType row, const Vector& vector ) const
-{
-}
-
 template< typename Real,
           typename Device,
           typename Index,
           ElementsOrganization Organization >
    template< typename InVector,
              typename OutVector >
-void 
+void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-vectorProduct( const InVector& inVector, OutVector& outVector ) const
+vectorProduct( const InVector& inVector,
+               OutVector& outVector,
+               const RealType matrixMultiplicator,
+               const RealType outVectorMultiplicator,
+               const IndexType begin,
+               IndexType end ) const
 {
    TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
    TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
@@ -455,10 +490,31 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const
    auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
       sum += value;
    };
-   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+   auto keeper1 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       outVectorView[ row ] = value;
    };
-   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
+   auto keeper2 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value;
+   };
+   if( end == 0 )
+      end = this->getRows();
+   if( matrixMultiplicator == 1.0 && outVectorMultiplicator == 0.0 )
+      this->rowsReduction( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 );
+   else
+      this->rowsReduction( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+TridiagonalMatrixView< Real, Device, Index, Organization >&
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+operator=( const TridiagonalMatrixView& view )
+{
+   MatrixView< Real, Device, Index >::operator=( view );
+   this->indexer = view.indexer;
+   return *this;
 }
 
 template< typename Real,
@@ -487,13 +543,13 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >&
       const auto matrix_view = matrix;
       const auto matrixMult = matrixMultiplicator;
       const auto thisMult = thisMatrixMultiplicator;
-      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
          value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
       };
-      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
          value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
       };
-      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
          value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
       };
       if( thisMult == 0.0 )
@@ -678,9 +734,7 @@ Index
 TridiagonalMatrixView< Real, Device, Index, Organization >::
 getElementIndex( const IndexType row, const IndexType column ) const
 {
-   IndexType localIdx = column - row;
-   if( row > 0 )
-      localIdx++;
+   IndexType localIdx = column - row + 1;
 
    TNL_ASSERT_GE( localIdx, 0, "" );
    TNL_ASSERT_LT( localIdx, 3, "" );
diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
index 3597c30f7d3eec37ef85b050cb01963e5f34715a..bba7b524e980b9cbe414eddbd18b5e156c806477 100644
--- a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
@@ -21,6 +21,7 @@ class MultidiagonalMatrixIndexer
    public:
 
       using IndexType = Index;
+      using ConstType = MultidiagonalMatrixIndexer< std::add_const_t< Index >, RowMajorOrder >;
 
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
 
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
index d7a3a429dc00dd194aef5aab2f05f289b9c9215e..8412d3dad815da2672570b6705452b49a86ebc69 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
@@ -15,8 +15,8 @@
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Matrices/DenseMatrix.h>
-#include <TNL/Matrices/Tridiagonal.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -433,8 +433,8 @@ void tridiagonalMatrixAssignment()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >;
 
    const IndexType rows( 10 ), columns( 10 );
    TridiagonalHost hostMatrix( rows, columns );
@@ -483,10 +483,10 @@ void multidiagonalMatrixAssignment()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
-   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
-   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+   using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsOffsetsType = typename MultidiagonalHost::DiagonalsOffsetsType;
+   DiagonalsOffsetsType diagonals{ -4, -2, 0, 1, 3, 5 };
 
    const IndexType rows( 10 ), columns( 10 );
    MultidiagonalHost hostMatrix( rows, columns, diagonals );
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
index 87d5e139bfc662cdd487e8fcfa2ce3c64a4b10c2..8d532ae7123b49f592b47a268ffc0bc3230580ab 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -948,7 +948,7 @@ void test_RowsReduction()
    // Compute number of non-zero elements in rows.
    typename Matrix::RowsCapacitiesType rowLengths( rows );
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
@@ -963,7 +963,7 @@ void test_RowsReduction()
    // Compute max norm
    TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
    auto rowSums_view = rowSums.getView();
-   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
       return abs( value );
    };
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
@@ -1136,62 +1136,4 @@ void test_SaveAndLoad( const char* filename )
    EXPECT_EQ( std::remove( filename ), 0 );
 }
 
-template< typename Matrix >
-void test_Print()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-
-   /*
-    * Sets up the following 5x4 sparse matrix:
-    *
-    *    /  1  1  1  0 \
-    *    |  0  0  0  1 |
-    *    |  1  1  1  0 |
-    *    |  0  1  1  1 |
-    *    \  0  0  1  1 /
-    */
-
-   const IndexType m_rows = 5;
-   const IndexType m_cols = 4;
-
-   Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
-   m.setCompressedRowLengths( rowLengths );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-       m.setElement( 0, i, 1 );
-
-   m.setElement( 1, 3, 1 );      // 1st row
-
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-       m.setElement( 2, i, 1 );
-
-   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-       m.setElement( 3, i, 1 );
-
-   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
-       m.setElement( 4, i, 1 );
-
-   std::stringstream printed;
-   std::stringstream couted;
-
-   //change the underlying buffer and save the old buffer
-   auto old_buf = std::cout.rdbuf(printed.rdbuf());
-
-   m.print( std::cout ); //all the std::cout goes to ss
-
-   std::cout.rdbuf(old_buf); //reset
-
-   couted << "Row: 0 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
-              "Row: 1 ->  Col:3->1\t\n"
-              "Row: 2 ->  Col:0->1	 Col:1->1	 Col:2->1\t\n"
-              "Row: 3 ->  Col:1->1	 Col:2->1	 Col:3->1\t\n"
-              "Row: 4 ->  Col:2->1	 Col:3->1\t\n";
-
-   EXPECT_EQ( printed.str(), couted.str() );
-}
-
 #endif
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
index a853281bef7b1bb9a3cb4985b6a3a53ba519ee45..ab072ab8ad9b4d2ff1237a25301022d424740f96 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
@@ -114,14 +114,6 @@ TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest )
 
     test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest_CSR" );
 }
-
-TYPED_TEST( BinaryMatrixTest_CSR, printTest )
-{
-    using CSRMatrixType = typename TestFixture::CSRMatrixType;
-
-    test_Print< CSRMatrixType >();
-}
-
 #endif
 
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
index e532c9ff19d154eb86053a2897de7d1d3c9b6ff3..f8cd5f415ddaf07c188e67a7863c6a3930a95e6b 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
@@ -125,14 +125,6 @@ TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest )
 
     test_SaveAndLoad< EllpackMatrixType >( "test_BinarySparseMatrixTest_Ellpack" );
 }
-
-TYPED_TEST( BinaryMatrixTest_Ellpack, printTest )
-{
-    using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
-
-    test_Print< EllpackMatrixType >();
-}
-
 #endif
 
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
index e332bc0109bf0d69242658859835fef72a432626..f58a018ac1685dd7ca1443017ed22d2ada1868a0 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
@@ -126,13 +126,6 @@ TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest )
     test_SaveAndLoad< SlicedEllpackMatrixType >( "test_BinarySparseMatrixTest" );
 }
 
-TYPED_TEST( BinaryMatrixTest_SlicedEllpack, printTest )
-{
-    using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
-
-    test_Print< SlicedEllpackMatrixType >();
-}
-
 #endif
 
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
index d311d774d7d06003cb7db510d8115f4eb977540b..0d502cf23d2697172c61a2bc79cc04d3dac8a151 100644
--- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
@@ -15,8 +15,8 @@
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Matrices/DenseMatrix.h>
-#include <TNL/Matrices/Tridiagonal.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -400,8 +400,8 @@ void tridiagonalMatrixAssignment()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >;
 
    const IndexType rows( 10 ), columns( 10 );
    TridiagonalHost hostMatrix( rows, columns );
@@ -449,10 +449,10 @@ void multidiagonalMatrixAssignment()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
-   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
-   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+   using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsOffsetsType = typename MultidiagonalHost::DiagonalsOffsetsType;
+   DiagonalsOffsetsType diagonals{ -4, -2, 0, 1, 3, 5 };
 
    const IndexType rows( 10 ), columns( 10 );
    MultidiagonalHost hostMatrix( rows, columns, diagonals );
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index ceed58546e34fe6e91a421d19d142eae23d89784..627bcdf0f8a27779d92d97945f088232ba51f232 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -36,14 +36,16 @@ static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
 void test_GetSerializationType()
 {
    using namespace TNL::Containers::Segments;
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) );
+   std::cerr << TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() << std::endl;
+   std::cerr << TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() << std::endl;
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, RowMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, RowMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, RowMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, RowMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, ColumnMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, ColumnMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, ColumnMajorOrder >" ) );
+   EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, ColumnMajorOrder >" ) );
 }
 
 template< typename Matrix >
@@ -166,7 +168,7 @@ void test_GetCompressedRowLengths()
 }
 
 template< typename Matrix >
-void test_GetElementsCount()
+void test_GetAllocatedElementsCount()
 {
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
@@ -179,7 +181,7 @@ void test_GetElementsCount()
     m.reset();
     m.setDimensions( rows, cols );
 
-    EXPECT_EQ( m.getElementsCount(), 42 );
+    EXPECT_EQ( m.getAllocatedElementsCount(), 42 );
 }
 
 template< typename Matrix >
@@ -1365,11 +1367,11 @@ TYPED_TEST( MatrixTest, setElementsTest )
     test_SetElements< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getElementsCountTest )
+TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetElementsCount< MatrixType >();
+    test_GetAllocatedElementsCount< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, getNonzeroElementsCountTest )
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.h b/src/UnitTests/Matrices/LambdaMatrixTest.h
index 07d1f336c4d5cc76c74bf19711fc4d2b68d3684b..cc2893d9bc0cc33d6385520a2b7d34e25c0b9907 100644
--- a/src/UnitTests/Matrices/LambdaMatrixTest.h
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.h
@@ -102,14 +102,6 @@ TYPED_TEST( LambdaMatrixTest, rowsReduction )
 
     test_RowsReduction< LambdaMatrixParametersType >();
 }
-
-TYPED_TEST( LambdaMatrixTest, printTest )
-{
-    using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
-
-    test_Print< LambdaMatrixParametersType >();
-}
-
 #endif
 
 #include "../main.h"
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.hpp b/src/UnitTests/Matrices/LambdaMatrixTest.hpp
index 23963c11906431fa6f80926cb21c7d5d12913c51..256a84bf7fba0069a9130c92afed76e6249fe547 100644
--- a/src/UnitTests/Matrices/LambdaMatrixTest.hpp
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.hpp
@@ -94,10 +94,10 @@ void test_GetCompressedRowLengths()
 
    MatrixType m( size, size, matrixElements, rowLengths );
    TNL::Containers::Vector< IndexType > correctRowLengths{ 1, 3, 3, 3, 1 };
-   TNL::Containers::Vector< IndexType > rowLengthsVector;
+   TNL::Containers::Vector< IndexType, DeviceType > rowLengthsVector;
    m.getCompressedRowLengths( rowLengthsVector );
    for( int i = 0; i < size; i++ )
-      EXPECT_EQ( correctRowLengths[ i ], rowLengthsVector[ i ] );
+      EXPECT_EQ( correctRowLengths.getElement( i ), rowLengthsVector.getElement( i ) );
 }
 
 template< typename Matrix >
@@ -233,11 +233,11 @@ void test_RowsReduction()
    TNL::Containers::Vector< RealType, DeviceType, IndexType > v( size, -1.0 );
    auto vView = v.getView();
 
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType columnIdx, const RealType& value ) mutable -> RealType {
       return value;
    };
-   auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
-      sum += value;
+   auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) -> RealType {
+      return sum + value;
    };
    auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       vView[ row ] = value;
@@ -251,56 +251,4 @@ void test_RowsReduction()
    EXPECT_EQ( v.getElement( 4 ),  1.0 );
 }
 
-template< typename Matrix >
-void test_Print()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-
-   IndexType size = 5;
-   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
-      if( rowIdx == 0 || rowIdx == size - 1 )
-         return 1;
-      return 3;
-   };
-
-   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
-      if( rowIdx == 0 || rowIdx == size -1 )
-      {
-         columnIdx = rowIdx;
-         value =  1.0;
-      }
-      else
-      {
-         columnIdx = rowIdx + localIdx - 1;
-         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
-      }
-   };
-
-   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
-
-   MatrixType m( size, size, matrixElements, rowLengths );
-
-   std::stringstream printed;
-   std::stringstream couted;
-
-   //change the underlying buffer and save the old buffer
-   auto old_buf = std::cout.rdbuf(printed.rdbuf());
-
-   
-   m.print( std::cout ); //all the std::cout goes to ss
-
-   std::cout.rdbuf(old_buf); //reset
-
-   couted << "Row: 0 ->  Col:0->1\t\n"
-             "Row: 1 ->  Col:0->1	 Col:1->-2	 Col:2->1\t\n"
-             "Row: 2 ->  Col:1->1	 Col:2->-2	 Col:3->1\t\n"
-             "Row: 3 ->  Col:2->1	 Col:3->-2	 Col:4->1\t\n"
-             "Row: 4 ->  Col:4->1\t\n";
-
-   EXPECT_EQ( printed.str(), couted.str() );
-}
-
-
 #endif // HAVE_GTEST
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index 8ee8c7ffb151d3d0f89f03af29941f5378384874..75aeda8245afb49744fc24fe63273993fdd50795 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -11,7 +11,7 @@
 #include <sstream>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
 #include <TNL/Containers/Array.h>
 
 #include <TNL/Containers/Vector.h>
@@ -19,11 +19,11 @@
 #include <TNL/Math.h>
 #include <iostream>
 
-using Multidiagonal_host_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >;
-using Multidiagonal_host_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >;
+using Multidiagonal_host_float = TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int >;
+using Multidiagonal_host_int = TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, int >;
 
-using Multidiagonal_cuda_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >;
-using Multidiagonal_cuda_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >;
+using Multidiagonal_cuda_float = TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int >;
+using Multidiagonal_cuda_int = TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, int >;
 
 static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl";
 
@@ -35,14 +35,14 @@ static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl";
 void test_GetSerializationType()
 {
    using namespace TNL::Containers::Segments;
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int,   TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int,   TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int,   TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int,   TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) );
 }
 
 template< typename Matrix >
@@ -51,14 +51,14 @@ void test_SetDimensions()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    const IndexType rows = 9;
    const IndexType cols = 8;
-   const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 };
+   const DiagonalsOffsetsType diagonalsOffsets{ -3, -1, 0, 2, 4 };
 
    Matrix m;
-   m.setDimensions( rows, cols, diagonalsShifts );
+   m.setDimensions( rows, cols, diagonalsOffsets );
 
    EXPECT_EQ( m.getRows(), 9 );
    EXPECT_EQ( m.getColumns(), 8 );
@@ -71,17 +71,17 @@ void test_SetLike()
    using RealType = typename Matrix1::RealType;
    using DeviceType = typename Matrix1::DeviceType;
    using IndexType = typename Matrix1::IndexType;
-   using DiagonalsShiftsType = typename Matrix1::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix1::DiagonalsOffsetsType;
 
    const IndexType rows = 8;
    const IndexType cols = 7;
-   const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 };
+   const DiagonalsOffsetsType diagonalsOffsets{ -3, -1, 0, 2, 4 };
 
    Matrix1 m1;
-   m1.setDimensions( rows + 1, cols + 2, diagonalsShifts );
+   m1.setDimensions( rows + 1, cols + 2, diagonalsOffsets );
 
    Matrix2 m2;
-   m2.setDimensions( rows, cols, diagonalsShifts );
+   m2.setDimensions( rows, cols, diagonalsOffsets );
 
    m1.setLike( m2 );
 
@@ -90,54 +90,64 @@ void test_SetLike()
 }
 
 template< typename Matrix >
-void test_GetNonemptyRowsCount()
+void test_SetElements()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
 
-   /*
-    * Sets up the following 5x8 matrix:
-    *
-    *    /  1  0  0  1  0  1  0  0 \
-    *    |  0  1  0  0  1  0  1  0 |
-    *    |  1  0  1  0  0  1  0  1 |
-    *    |  0  1  0  1  0  0  1  0 |
-    *    \  0  0  1  0  1  0  0  1 /
-    */
-   Matrix m1( 5, 8, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
-   m1.setValue( 1.0 );
-   EXPECT_EQ( m1.getNonemptyRowsCount(), 5 );
-
-   /*
-    * Sets up the following 5x5 matrix:
-    *
-    *    /  1  0  0  1  0  \
-    *    |  0  1  0  0  1  |
-    *    |  1  0  1  0  0  |
-    *    |  0  1  0  1  0  |
-    *    \  0  0  1  0  1  /
-    */
-   Matrix m2( 5, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
-   m2.setValue( 1.0 );
-   EXPECT_EQ( m2.getNonemptyRowsCount(), 5 );
-
-   /*
-    * Sets up the following 8x5 matrix:
-    *
-    *    /  1  0  0  1  0  \
-    *    |  0  1  0  0  1  |
-    *    |  1  0  1  0  0  |
-    *    |  0  1  0  1  0  |
-    *    |  0  0  1  0  1  |
-    *    |  0  0  0  1  0  |
-    *    |  0  0  0  0  1  |
-    *    \  0  0  0  0  0  /
-    */
-   Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
-   m3.setValue( 1.0 );
-   EXPECT_EQ( m3.getNonemptyRowsCount(), 7 );
+   const int gridSize( 4 );
+   const int matrixSize( gridSize * gridSize );
+   Matrix matrix( matrixSize, matrixSize, { - gridSize, -1, 0, 1, gridSize } );
+   matrix.setElements( {
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      { -1.0, -1.0, 4.0, -1.0, -1.0 },
+      { -1.0, -1.0, 4.0, -1.0, -1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      { -1.0, -1.0, 4.0, -1.0, -1.0 },
+      { -1.0, -1.0, 4.0, -1.0, -1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 },
+      {  0.0,  0.0, 1.0 }
+   } );
+
+   for( int i = 0; i < gridSize; i++ )
+      for( int j = 0; j < gridSize; j++ )
+      {
+         const int elementIdx = i * gridSize + j;
+         if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 )  // check matrix elements corresponding to boundary grid nodes
+         {
+            for( int k = 0; k < matrixSize; k++ )
+            {
+               if( elementIdx == k )
+                  EXPECT_EQ( matrix.getElement( elementIdx, k ), 1.0 );
+               else
+                  EXPECT_EQ( matrix.getElement( elementIdx, k ), 0.0 );
+            }
+         }
+         else // check matrix elements corresponding to inner grid nodes
+         {
+            for( int k = 0; k < matrixSize; k++ )
+            {
+               if( k == elementIdx - gridSize || 
+                   k == elementIdx - 1 ||
+                   k == elementIdx + 1 ||
+                   k == elementIdx + gridSize )
+                  EXPECT_EQ( matrix.getElement( elementIdx, k ), -1.0 );
+               else if( k == elementIdx )
+                  EXPECT_EQ( matrix.getElement( elementIdx, k ), 4.0 );
+               else
+                  EXPECT_EQ( matrix.getElement( elementIdx, k ), 0.0 );
+            }
+         }
+      }
 }
 
 template< typename Matrix >
@@ -146,7 +156,7 @@ void test_GetCompressedRowLengths()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 8x8 matrix:
@@ -164,7 +174,7 @@ void test_GetCompressedRowLengths()
    const IndexType rows = 8;
    const IndexType cols = 8;
 
-   Matrix m( rows, cols, DiagonalsShiftsType({ -2, 0, 3, 5 }) );
+   Matrix m( rows, cols, DiagonalsOffsetsType({ -2, 0, 3, 5 }) );
    m.setValue( 1.0 );
    m.setElement( 0, 0, 0.0 );
    m.setElement( 7, 7, 0.0 );
@@ -182,28 +192,28 @@ void test_GetAllocatedElementsCount()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    const IndexType rows = 7;
    const IndexType cols = 6;
 
-   Matrix m1( 7, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   Matrix m1( 7, 6, DiagonalsOffsetsType( { -2, 0, 3, 5 } ) );
    EXPECT_EQ( m1.getAllocatedElementsCount(), 28 );
 
-   Matrix m2( 8, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   Matrix m2( 8, 6, DiagonalsOffsetsType( { -2, 0, 3, 5 } ) );
    EXPECT_EQ( m2.getAllocatedElementsCount(), 32 );
 
-   Matrix m3( 9, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) );
+   Matrix m3( 9, 6, DiagonalsOffsetsType( { -2, 0, 3, 5 } ) );
    EXPECT_EQ( m3.getAllocatedElementsCount(), 32 );
 }
 
 template< typename Matrix >
-void test_GetNumberOfNonzeroMatrixElements()
+void test_GetNonzeroElementsCount()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 7x6 matrix:
@@ -221,11 +231,11 @@ void test_GetNumberOfNonzeroMatrixElements()
    const IndexType rows = 7;
    const IndexType cols = 6;
 
-   Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) );
+   Matrix m( rows, cols, DiagonalsOffsetsType( { -3, 0, 2, 4 } ) );
    m.setValue( 1.0 );
    m.setElement( 0, 0, 0.0 );
 
-   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
+   EXPECT_EQ( m.getNonzeroElementsCount(), 15 );
 }
 
 template< typename Matrix >
@@ -234,7 +244,7 @@ void test_Reset()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 5x4 matrix:
@@ -248,7 +258,7 @@ void test_Reset()
    const IndexType rows = 5;
    const IndexType cols = 4;
 
-   Matrix m( rows, cols, DiagonalsShiftsType( { 0, 1, 2, 4 } ) );
+   Matrix m( rows, cols, DiagonalsOffsetsType( { 0, 1, 2, 4 } ) );
    m.reset();
 
    EXPECT_EQ( m.getRows(), 0 );
@@ -261,7 +271,7 @@ void test_SetValue()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 7x6 matrix:
@@ -277,7 +287,7 @@ void test_SetValue()
    const IndexType rows = 7;
    const IndexType cols = 6;
 
-   Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) );
+   Matrix m( rows, cols, DiagonalsOffsetsType( { -3, 0, 2, 4 } ) );
    m.setValue( 1.0 );
 
    EXPECT_EQ( m.getElement( 0, 0 ), 1 );
@@ -336,7 +346,7 @@ void test_SetElement()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 5x5 matrix:
@@ -349,7 +359,7 @@ void test_SetElement()
     */
    const IndexType rows = 5;
    const IndexType cols = 5;
-   DiagonalsShiftsType diagonals{-3, 0, 1, 4 };
+   DiagonalsOffsetsType diagonals{-3, 0, 1, 4 };
    Matrix m( rows, cols, diagonals );
 
    RealType value = 1;
@@ -399,7 +409,7 @@ void test_AddElement()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 5x5 matrix:
@@ -412,7 +422,7 @@ void test_AddElement()
     */
    const IndexType rows = 5;
    const IndexType cols = 5;
-   DiagonalsShiftsType diagonals{-3, 0, 1, 4 };
+   DiagonalsOffsetsType diagonals{-3, 0, 1, 4 };
    Matrix m( rows, cols, diagonals );
 
    RealType value = 1;
@@ -520,7 +530,7 @@ void test_SetRow()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 5x7 matrix:
@@ -534,7 +544,7 @@ void test_SetRow()
    const IndexType rows = 5;
    const IndexType cols = 7;
 
-   Matrix m( rows, cols, DiagonalsShiftsType({ -1, 0, 2, 4 }) );
+   Matrix m( rows, cols, DiagonalsOffsetsType({ -1, 0, 2, 4 }) );
 
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
@@ -597,7 +607,7 @@ void test_AddRow()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 6x5 matrix:
@@ -612,7 +622,7 @@ void test_AddRow()
 
    const IndexType rows = 6;
    const IndexType cols = 5;
-   DiagonalsShiftsType diagonals( { -2, 0, 1, 2 } );
+   DiagonalsOffsetsType diagonals( { -2, 0, 1, 2 } );
 
    Matrix m( rows, cols, diagonals );
 
@@ -736,7 +746,7 @@ void test_VectorProduct()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 5x4 matrix:
@@ -749,7 +759,7 @@ void test_VectorProduct()
     */
    const IndexType rows = 5;
    const IndexType cols = 4;
-   DiagonalsShiftsType diagonals{ -2, 0, 2 };
+   DiagonalsOffsetsType diagonals{ -2, 0, 2 };
 
    Matrix m( rows, cols, diagonals );
 
@@ -785,8 +795,8 @@ void test_AddMatrix()
    using RealType = typename Matrix1::RealType;
    using DeviceType = typename Matrix1::DeviceType;
    using IndexType = typename Matrix1::IndexType;
-   using DiagonalsShiftsType1 = typename Matrix1::DiagonalsShiftsType;
-   using DiagonalsShiftsType2 = typename Matrix2::DiagonalsShiftsType;
+   using DiagonalsOffsetsType1 = typename Matrix1::DiagonalsOffsetsType;
+   using DiagonalsOffsetsType2 = typename Matrix2::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 5x4 matrix:
@@ -799,8 +809,8 @@ void test_AddMatrix()
     */
    const IndexType rows = 5;
    const IndexType cols = 4;
-   DiagonalsShiftsType1 diagonals1;
-   DiagonalsShiftsType2 diagonals2;
+   DiagonalsOffsetsType1 diagonals1;
+   DiagonalsOffsetsType2 diagonals2;
 
    Matrix1 m( rows, cols, diagonals1 );
 
@@ -908,7 +918,7 @@ void test_GetMatrixProduct()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+    using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 /*
  * Sets up the following 5x4 matrix:
  *
@@ -920,9 +930,9 @@ void test_GetMatrixProduct()
  */
     const IndexType leftRows = 5;
     const IndexType leftCols = 4;
-    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
+    DiagonalsOffsetsType diagonalsOffsets( { 0, 1, 2 } );
 
-    Matrix leftMatrix( leftRows, leftCols, diagonalsShifts );
+    Matrix leftMatrix( leftRows, leftCols, diagonalsOffsets );
 
     RealType value = 1;
     for( IndexType i = 0; i < leftRows; i++ )
@@ -959,7 +969,7 @@ void test_GetMatrixProduct()
  *    \  0  0  0  0 /
  */
 
-    Matrix mResult( leftRows, rightCols, diagonalsShifts );
+    Matrix mResult( leftRows, rightCols, diagonalsOffsets );
     mResult.setValue( 0 );
 
     RealType leftMatrixMultiplicator = 1;
@@ -1011,7 +1021,7 @@ void test_GetTransposition()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+    using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 /*
  * Sets up the following 3x2 matrix:
  *
@@ -1021,9 +1031,9 @@ void test_GetTransposition()
  */
     const IndexType rows = 3;
     const IndexType cols = 2;
-    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
+    DiagonalsOffsetsType diagonalsOffsets( { 0, 1, 2 } );
 
-    Matrix m( rows, cols, diagonalsShifts );
+    Matrix m( rows, cols, diagonalsOffsets );
 
     RealType value = 1;
     for( IndexType i = 0; i < rows; i++ )
@@ -1038,7 +1048,7 @@ void test_GetTransposition()
  *    /  0  0  0 \
  *    \  0  0  0 /
  */
-    Matrix mTransposed( cols, rows, diagonalsShifts );
+    Matrix mTransposed( cols, rows, diagonalsOffsets );
 
     mTransposed.print( std::cout );
 
@@ -1071,7 +1081,7 @@ void test_PerformSORIteration()
     using RealType = typename Matrix::RealType;
     using DeviceType = typename Matrix::DeviceType;
     using IndexType = typename Matrix::IndexType;
-    using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+    using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 /*
  * Sets up the following 4x4 matrix:
  *
@@ -1082,9 +1092,9 @@ void test_PerformSORIteration()
  */
     const IndexType rows = 4;
     const IndexType cols = 4;
-    DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } );
+    DiagonalsOffsetsType diagonalsOffsets( { 0, 1, 2 } );
 
-    Matrix m( rows, cols, diagonalsShifts );
+    Matrix m( rows, cols, diagonalsOffsets );
 
     m.setElement( 0, 0, 4.0 );        // 0th row
     m.setElement( 0, 1, 1.0 );
@@ -1147,36 +1157,36 @@ void test_AssignmentOperator()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
    constexpr TNL::Containers::Segments::ElementsOrganization organization = Matrix::getOrganization();
 
-   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, organization >;
-   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType,
+   using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType, organization >;
+   using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType,
       organization == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder >;
 
    const IndexType rows( 10 ), columns( 10 );
-   DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } );
-   MultidiagonalHost hostMatrix( rows, columns, diagonalsShifts );
+   DiagonalsOffsetsType diagonalsOffsets( { -4, -2, 0, 2, 3, 5 } );
+   MultidiagonalHost hostMatrix( rows, columns, diagonalsOffsets );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j <  columns; j++ )
-         if( diagonalsShifts.containsValue( j - i ) )
+         if( diagonalsOffsets.containsValue( j - i ) )
             hostMatrix.setElement( i, j,  i + j );
 
-   Matrix matrix( rows, columns, diagonalsShifts );
+   Matrix matrix( rows, columns, diagonalsOffsets );
    matrix.getValues() = 0.0;
    matrix = hostMatrix;
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j < rows; j++ )
-            if( diagonalsShifts.containsValue( j - i ) )
+            if( diagonalsOffsets.containsValue( j - i ) )
                EXPECT_EQ( matrix.getElement( i, j ), i + j );
             else
                EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
 
 #ifdef HAVE_CUDA
-   MultidiagonalCuda cudaMatrix( rows, columns, diagonalsShifts );
+   MultidiagonalCuda cudaMatrix( rows, columns, diagonalsOffsets );
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
-         if( diagonalsShifts.containsValue( j - i ) )
+         if( diagonalsOffsets.containsValue( j - i ) )
             cudaMatrix.setElement( i, j, i + j );
 
    matrix.getValues() = 0.0;
@@ -1184,7 +1194,7 @@ void test_AssignmentOperator()
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < columns; j++ )
       {
-         if( diagonalsShifts.containsValue( j - i ) )
+         if( diagonalsOffsets.containsValue( j - i ) )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
          else
             EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
@@ -1199,7 +1209,7 @@ void test_SaveAndLoad()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
+   using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType;
 
    /*
     * Sets up the following 4x4 matrix:
@@ -1211,15 +1221,15 @@ void test_SaveAndLoad()
     */
    const IndexType rows = 4;
    const IndexType cols = 4;
-   DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } );
+   DiagonalsOffsetsType diagonalsOffsets( { -1, 0, 1 } );
 
-   Matrix savedMatrix( rows, cols, diagonalsShifts );
+   Matrix savedMatrix( rows, cols, diagonalsOffsets );
 
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++ )
       {
-         if( diagonalsShifts.containsValue( j - i ) )
+         if( diagonalsOffsets.containsValue( j - i ) )
             savedMatrix.setElement( i, j, value );
          value++;
       }
@@ -1271,56 +1281,6 @@ void test_SaveAndLoad()
    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
 }
 
-template< typename Matrix >
-void test_Print()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-   using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType;
-
-   /*
-    * Sets up the following 5x4 sparse matrix:
-    *
-    *    /  1  2  0  0 \
-    *    |  5  6  7  0 |
-    *    |  0 10 11 12 |
-    *    |  0  0 15 16 |
-    *    \  0  0  0 20 /
-    */
-   const IndexType rows = 5;
-   const IndexType cols = 4;
-   DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } );
-
-   Matrix m( rows, cols, diagonalsShifts );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < rows; i++)
-      for( IndexType j = 0; j < cols; j++)
-      {
-         if( abs( i - j ) <= 1 )
-            m.setElement( i, j, value );
-         value++;
-      }
-
-   std::stringstream printed;
-   std::stringstream couted;
-
-   //change the underlying buffer and save the old buffer
-   auto old_buf = std::cout.rdbuf(printed.rdbuf());
-
-   m.print( std::cout ); //all the std::cout goes to ss
-
-   std::cout.rdbuf(old_buf); //reset
-   couted << "Row: 0 ->  Col:0->1\t Col:1->2\t\n"
-             "Row: 1 ->  Col:0->5\t Col:1->6\t Col:2->7\t\n"
-             "Row: 2 ->  Col:1->10\t Col:2->11\t Col:3->12\t\n"
-             "Row: 3 ->  Col:2->15\t Col:3->16\t\n"
-             "Row: 4 ->  Col:3->20\t\n";
-
-   EXPECT_EQ( printed.str(), couted.str() );
-}
-
 // test fixture for typed tests
 template< typename Matrix >
 class MatrixTest : public ::testing::Test
@@ -1332,31 +1292,31 @@ protected:
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
 <
-    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, short >,
-    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, int >,
-    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, long >
+    TNL::Matrices::MultidiagonalMatrix< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::MultidiagonalMatrix< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::MultidiagonalMatrix< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Host, short >,
+    TNL::Matrices::MultidiagonalMatrix< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::MultidiagonalMatrix< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::MultidiagonalMatrix< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Host, int >,
+    TNL::Matrices::MultidiagonalMatrix< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::MultidiagonalMatrix< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::MultidiagonalMatrix< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-    ,TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::Multidiagonal< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::Multidiagonal< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::Multidiagonal< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, long >
+    ,TNL::Matrices::MultidiagonalMatrix< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::MultidiagonalMatrix< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::MultidiagonalMatrix< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::MultidiagonalMatrix< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::MultidiagonalMatrix< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::MultidiagonalMatrix< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::MultidiagonalMatrix< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::MultidiagonalMatrix< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::MultidiagonalMatrix< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Cuda, long >
 #endif
 >;
 
@@ -1381,14 +1341,13 @@ TYPED_TEST( MatrixTest, setLikeTest )
     test_SetLike< MatrixType, MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getNonemptyRowsCountTest )
+TYPED_TEST( MatrixTest, setElements )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetNonemptyRowsCount< MatrixType >();
+    test_SetElements< MatrixType >();
 }
 
-
 TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1407,7 +1366,7 @@ TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+    test_GetNonzeroElementsCount< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, resetTest )
@@ -1480,13 +1439,6 @@ TYPED_TEST( MatrixTest, saveAndLoadTest )
     test_SaveAndLoad< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, printTest )
-{
-    using MatrixType = typename TestFixture::MatrixType;
-
-    test_Print< MatrixType >();
-}
-
 /*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host )
 {
     bool testRan = false;
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index 6f8a142a673f6e206d6fa589145706c55aed3ec5..ffcca4b606c9d665c9442dd0488d447a73adef30 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -15,8 +15,8 @@
 #include <TNL/Matrices/SparseMatrix.h>
 #include <TNL/Matrices/MatrixType.h>
 #include <TNL/Matrices/DenseMatrix.h>
-#include <TNL/Matrices/Tridiagonal.h>
-#include <TNL/Matrices/Multidiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
 #include <TNL/Containers/Segments/CSR.h>
 #include <TNL/Containers/Segments/Ellpack.h>
 #include <TNL/Containers/Segments/SlicedEllpack.h>
@@ -436,8 +436,8 @@ void tridiagonalMatrixAssignment()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
+   using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >;
 
    const IndexType rows( 10 ), columns( 10 );
    TridiagonalHost hostMatrix( rows, columns );
@@ -486,10 +486,10 @@ void multidiagonalMatrixAssignment()
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
 
-   using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >;
-   using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType;
-   DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 };
+   using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
+   using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >;
+   using DiagonalsOffsetsType = typename MultidiagonalHost::DiagonalsOffsetsType;
+   DiagonalsOffsetsType diagonals{ -4, -2, 0, 1, 3, 5 };
 
    const IndexType rows( 10 ), columns( 10 );
    MultidiagonalHost hostMatrix( rows, columns, diagonals );
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index a00e696871b4393672cc6c2475fb961f08187b8d..98e779daea91af6f521af04fbc402706eed14d8f 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -108,11 +108,4 @@ TYPED_TEST( MatrixTest, saveAndLoadTest )
 
     test_SaveAndLoad< MatrixType >( saveAndLoadFileName );
 }
-
-TYPED_TEST( MatrixTest, printTest )
-{
-    using MatrixType = typename TestFixture::MatrixType;
-
-    test_Print< MatrixType >();
-}
 #endif
\ No newline at end of file
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 6d7c6436058cec31efa1fe4e64cc4e27238a67a5..38abdf07e0f105fee9503703578cb846983b5158 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -320,7 +320,7 @@ void test_SetLike()
 }
 
 template< typename Matrix >
-void test_GetNumberOfNonzeroMatrixElements()
+void test_GetNonzeroElementsCount()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -369,7 +369,7 @@ void test_GetNumberOfNonzeroMatrixElements()
       for( IndexType i = 0; i < cols; i++ )
          m.setElement( j, i, value++ );
 
-   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 );
+   EXPECT_EQ( m.getNonzeroElementsCount(), 41 );
 }
 
 template< typename Matrix >
@@ -1374,7 +1374,7 @@ void test_RowsReduction()
    // Compute number of non-zero elements in rows.
    typename Matrix::RowsCapacitiesType rowLengths( rows );
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
@@ -1389,7 +1389,7 @@ void test_RowsReduction()
    // Compute max norm
    TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
    auto rowSums_view = rowSums.getView();
-   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
       return abs( value );
    };
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
@@ -1557,62 +1557,4 @@ void test_SaveAndLoad( const char* filename )
    EXPECT_EQ( std::remove( filename ), 0 );
 }
 
-template< typename Matrix >
-void test_Print()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-
-   /*
-    * Sets up the following 5x4 sparse matrix:
-    *
-    *    /  1  2  3  0 \
-    *    |  0  0  0  4 |
-    *    |  5  6  7  0 |
-    *    |  0  8  9 10 |
-    *    \  0  0 11 12 /
-    */
-
-   const IndexType m_rows = 5;
-   const IndexType m_cols = 4;
-
-   Matrix m( m_rows, m_cols );
-   typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 );
-   m.setCompressedRowLengths( rowLengths );
-
-   RealType value = 1;
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 0th row
-      m.setElement( 0, i, value++ );
-
-   m.setElement( 1, 3, value++ );                // 1st row
-
-   for( IndexType i = 0; i < m_cols - 1; i++ )   // 2nd row
-      m.setElement( 2, i, value++ );
-
-   for( IndexType i = 1; i < m_cols; i++ )       // 3rd row
-      m.setElement( 3, i, value++ );
-
-   for( IndexType i = 2; i < m_cols; i++ )       // 4th row
-      m.setElement( 4, i, value++ );
-
-   std::stringstream printed;
-   std::stringstream couted;
-
-   //change the underlying buffer and save the old buffer
-   auto old_buf = std::cout.rdbuf(printed.rdbuf());
-
-   m.print( std::cout ); //all the std::cout goes to ss
-
-   std::cout.rdbuf(old_buf); //reset
-
-   couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
-             "Row: 1 ->  Col:3->4\t\n"
-             "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
-             "Row: 3 ->  Col:1->8	 Col:2->9	 Col:3->10\t\n"
-             "Row: 4 ->  Col:2->11	 Col:3->12\t\n";
-
-   EXPECT_EQ( printed.str(), couted.str() );
-}
-
 #endif
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
index 02fd8c585366f4da12d1218a28adca717dd2cdf2..3c40937851780fff92b6a3a6c33cc801310d00c3 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -45,11 +45,11 @@ TYPED_TEST( MatrixTest, setLikeTest )
     test_SetLike< MatrixType, MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElements )
+TYPED_TEST( MatrixTest, getNonzeroElementsCount )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+    test_GetNonzeroElementsCount< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, resetTest )
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
index 4e28842ba066ea5f794d8a279dacda09fbad1a85..c316440d1719e67e0a3e612963df4e7e93caeb48 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -163,7 +163,7 @@ void test_SetLike()
 }
 
 template< typename Matrix >
-void test_GetNumberOfNonzeroMatrixElements()
+void test_GetNonzeroElementsCount()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -204,7 +204,7 @@ void test_GetNumberOfNonzeroMatrixElements()
                                   { 10, 2, 28 },                              { 10, 4, 29 },                                                      { 10, 10, 30 }
    } );
 
-   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 49 );
+   EXPECT_EQ( m.getNonzeroElementsCount(), 49 );
 }
 
 template< typename Matrix >
@@ -898,7 +898,7 @@ void test_RowsReduction()
    typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } );
    auto rowLengths_view = rowLengths.getView();
    rowLengths_view = 0;
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) mutable -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) mutable -> IndexType {
       if( value != 0.0 && row != column)
          TNL::Algorithms::AtomicOperations< DeviceType >::add( rowLengths_view[ column ], ( IndexType ) 1 );
       return ( value != 0.0 );
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index bb613a17db750954c0310ade3beb830b6b745108..4c68db45a89334119494db0832a3d53e76dcd93e 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -11,7 +11,7 @@
 #include <sstream>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
-#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrix.h>
 #include <TNL/Containers/Array.h>
 
 #include <TNL/Containers/Vector.h>
@@ -20,11 +20,11 @@
 #include <TNL/Math.h>
 #include <iostream>
 
-using Tridiagonal_host_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >;
-using Tridiagonal_host_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >;
+using Tridiagonal_host_float = TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int >;
+using Tridiagonal_host_int = TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, int >;
 
-using Tridiagonal_cuda_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >;
-using Tridiagonal_cuda_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >;
+using Tridiagonal_cuda_float = TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int >;
+using Tridiagonal_cuda_int = TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, int >;
 
 static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl";
 
@@ -36,14 +36,14 @@ static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl";
 void test_GetSerializationType()
 {
    using namespace TNL::Containers::Segments;
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) );
-   EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int,   TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int,   TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, true, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int,   TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, false, [any_allocator] >" ) );
+   EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int,   TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, false, [any_allocator] >" ) );
 }
 
 template< typename Matrix >
@@ -139,28 +139,6 @@ void test_GetCompressedRowLengths()
    EXPECT_EQ( rowLengths, correctRowLengths );
 }
 
-template< typename Matrix >
-void test_GetRowLength()
-{
-   using RealType = typename Matrix::RealType;
-   using DeviceType = typename Matrix::DeviceType;
-   using IndexType = typename Matrix::IndexType;
-
-   const IndexType rows = 8;
-   const IndexType cols = 7;
-
-   Matrix m( rows, cols );
-
-   EXPECT_EQ( m.getRowLength( 0 ), 2 );
-   EXPECT_EQ( m.getRowLength( 1 ), 3 );
-   EXPECT_EQ( m.getRowLength( 2 ), 3 );
-   EXPECT_EQ( m.getRowLength( 3 ), 3 );
-   EXPECT_EQ( m.getRowLength( 4 ), 3 );
-   EXPECT_EQ( m.getRowLength( 5 ), 3 );
-   EXPECT_EQ( m.getRowLength( 6 ), 2 );
-   EXPECT_EQ( m.getRowLength( 7 ), 1 );
-}
-
 template< typename Matrix >
 void test_GetAllocatedElementsCount()
 {
@@ -177,7 +155,7 @@ void test_GetAllocatedElementsCount()
 }
 
 template< typename Matrix >
-void test_GetNumberOfNonzeroMatrixElements()
+void test_GetNonzeroElementsCount()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -206,7 +184,7 @@ void test_GetNumberOfNonzeroMatrixElements()
 
    m.setElement( 5, 5, 0);
 
-   EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 );
+   EXPECT_EQ( m.getNonzeroElementsCount(), 15 );
 }
 
 template< typename Matrix >
@@ -579,14 +557,12 @@ void test_SetRow()
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 3 ][ 3 ] {
-         {  1,  2,  0 },
+         {  0,  1,  2 },
          {  8,  9, 10 },
          { 16, 17, 18 } };
       auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 3; i++ )
       {
-         if( rowIdx == 0 && i > 1 )
-            break;
          row.setElement( i, values[ rowIdx ][ i ] );
       }
    };
@@ -700,7 +676,7 @@ void test_AddRow()
    auto matrix_view = m.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
       RealType values[ 6 ][ 3 ] {
-         { 11, 11,  0 },
+         {  0, 11, 11 },
          { 22, 22, 22 },
          { 33, 33, 33 },
          { 44, 44, 44 },
@@ -1170,8 +1146,8 @@ void test_AssignmentOperator()
    using IndexType = typename Matrix::IndexType;
    constexpr TNL::Containers::Segments::ElementsOrganization organization = Matrix::getOrganization();
 
-   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, organization >;
-   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType,
+   using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType, organization >;
+   using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType,
       organization == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder >;
 
    const IndexType rows( 10 ), columns( 10 );
@@ -1347,31 +1323,31 @@ protected:
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
 <
-    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, short >,
-    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, short >,
-    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, short >,
-    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, short >,
-    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, int >,
-    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, int >,
-    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, int >,
-    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, int >,
-    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Host, long >,
-    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Host, long >,
-    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Host, long >,
-    TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, long >
+    TNL::Matrices::TridiagonalMatrix< int,    TNL::Devices::Host, short >,
+    TNL::Matrices::TridiagonalMatrix< long,   TNL::Devices::Host, short >,
+    TNL::Matrices::TridiagonalMatrix< float,  TNL::Devices::Host, short >,
+    TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Host, short >,
+    TNL::Matrices::TridiagonalMatrix< int,    TNL::Devices::Host, int >,
+    TNL::Matrices::TridiagonalMatrix< long,   TNL::Devices::Host, int >,
+    TNL::Matrices::TridiagonalMatrix< float,  TNL::Devices::Host, int >,
+    TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Host, int >,
+    TNL::Matrices::TridiagonalMatrix< int,    TNL::Devices::Host, long >,
+    TNL::Matrices::TridiagonalMatrix< long,   TNL::Devices::Host, long >,
+    TNL::Matrices::TridiagonalMatrix< float,  TNL::Devices::Host, long >,
+    TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Host, long >
 #ifdef HAVE_CUDA
-    ,TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, short >,
-    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, short >,
-    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, short >,
-    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, short >,
-    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, int >,
-    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, int >,
-    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, int >,
-    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, int >,
-    TNL::Matrices::Tridiagonal< int,    TNL::Devices::Cuda, long >,
-    TNL::Matrices::Tridiagonal< long,   TNL::Devices::Cuda, long >,
-    TNL::Matrices::Tridiagonal< float,  TNL::Devices::Cuda, long >,
-    TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, long >
+    ,TNL::Matrices::TridiagonalMatrix< int,    TNL::Devices::Cuda, short >,
+    TNL::Matrices::TridiagonalMatrix< long,   TNL::Devices::Cuda, short >,
+    TNL::Matrices::TridiagonalMatrix< float,  TNL::Devices::Cuda, short >,
+    TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Cuda, short >,
+    TNL::Matrices::TridiagonalMatrix< int,    TNL::Devices::Cuda, int >,
+    TNL::Matrices::TridiagonalMatrix< long,   TNL::Devices::Cuda, int >,
+    TNL::Matrices::TridiagonalMatrix< float,  TNL::Devices::Cuda, int >,
+    TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Cuda, int >,
+    TNL::Matrices::TridiagonalMatrix< int,    TNL::Devices::Cuda, long >,
+    TNL::Matrices::TridiagonalMatrix< long,   TNL::Devices::Cuda, long >,
+    TNL::Matrices::TridiagonalMatrix< float,  TNL::Devices::Cuda, long >,
+    TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Cuda, long >
 #endif
 >;
 
@@ -1403,13 +1379,6 @@ TYPED_TEST( MatrixTest, getCompressedRowLengthTest )
     test_GetCompressedRowLengths< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getRowLengthTest )
-{
-    using MatrixType = typename TestFixture::MatrixType;
-
-    test_GetRowLength< MatrixType >();
-}
-
 TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
@@ -1417,11 +1386,11 @@ TYPED_TEST( MatrixTest, getAllocatedElementsCountTest )
     test_GetAllocatedElementsCount< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest )
+TYPED_TEST( MatrixTest, getNonzeroElementsCountTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_GetNumberOfNonzeroMatrixElements< MatrixType >();
+    test_GetNonzeroElementsCount< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, resetTest )
@@ -1488,7 +1457,7 @@ TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering )
     using DeviceType = typename MatrixType::DeviceType;
     using IndexType = typename MatrixType::IndexType;
     using RealAllocatorType = typename MatrixType::RealAllocatorType;
-    using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType,
+    using MatrixType2 = TNL::Matrices::TridiagonalMatrix< RealType, DeviceType, IndexType,
      MatrixType::getOrganization() == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder,
       RealAllocatorType >;