diff --git a/Documentation/Examples/Algorithms/ParallelForExample.cu b/Documentation/Examples/Algorithms/ParallelForExample.cu
index d63e711834f7e96a58f659f34542c9ac03458148..316ea3cb98c305ae3fa0b1114edc99f44fa7bebb 100644
--- a/Documentation/Examples/Algorithms/ParallelForExample.cu
+++ b/Documentation/Examples/Algorithms/ParallelForExample.cu
@@ -36,7 +36,7 @@ int main( int argc, char* argv[] )
     */
    Vector< double, Devices::Host > host_v1( 10 ), host_v2( 10 ), host_result( 10 );
    host_v1 = 1.0;
-   host_v2.forEachElement( []__cuda_callable__ ( int i, double& v ) { v = i; } );
+   host_v2.forAllElements( []__cuda_callable__ ( int i, double& v ) { v = i; } );
    vectorSum( host_v1, host_v2, 2.0, host_result );
    std::cout << "host_v1 = " << host_v1 << std::endl;
    std::cout << "host_v2 = " << host_v2 << std::endl;
@@ -48,7 +48,7 @@ int main( int argc, char* argv[] )
 #ifdef HAVE_CUDA
    Vector< double, Devices::Cuda > cuda_v1( 10 ), cuda_v2( 10 ), cuda_result( 10 );
    cuda_v1 = 1.0;
-   cuda_v2.forEachElement( []__cuda_callable__ ( int i, double& v ) { v = i; } );
+   cuda_v2.forAllElements( []__cuda_callable__ ( int i, double& v ) { v = i; } );
    vectorSum( cuda_v1, cuda_v2, 2.0, cuda_result );
    std::cout << "cuda_v1 = " << cuda_v1 << std::endl;
    std::cout << "cuda_v2 = " << cuda_v2 << std::endl;
diff --git a/Documentation/Examples/Containers/ArrayExample_forElements.cpp b/Documentation/Examples/Containers/ArrayExample_forElements.cpp
index ba29b8361c3966e308c94fe7790d668cc06ca70c..dd58f87c7f30377e679e01e561b8e3f0c684f756 100644
--- a/Documentation/Examples/Containers/ArrayExample_forElements.cpp
+++ b/Documentation/Examples/Containers/ArrayExample_forElements.cpp
@@ -17,7 +17,7 @@ void forElementsExample()
    /****
     * Initiate the elements of array `a`
     */
-   a.forEachElement( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
+   a.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
 
    /****
     * Initiate elements of array `b` with indexes 0-4 using `a_view`
diff --git a/Documentation/Examples/Containers/ArrayExample_reduceElements.cpp b/Documentation/Examples/Containers/ArrayExample_reduceElements.cpp
index b847d0620b51a42beb25c5da6bc2fb797e6c8b1b..bdf9437321ce7b7f2ba5fae698259fa4e2de4c2e 100644
--- a/Documentation/Examples/Containers/ArrayExample_reduceElements.cpp
+++ b/Documentation/Examples/Containers/ArrayExample_reduceElements.cpp
@@ -17,7 +17,7 @@ void reduceElementsExample()
    /****
     * Initiate the elements of array `a`
     */
-   a.forEachElement( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
+   a.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
 
    /****
     * Sum all elements of array `a`
diff --git a/Documentation/Examples/Containers/ArrayViewExample.cpp b/Documentation/Examples/Containers/ArrayViewExample.cpp
index 8103f8b3d32c82f589449cc1eae64b33382f2552..fdc1897c53bfc11075878672cf7167705b71b255 100644
--- a/Documentation/Examples/Containers/ArrayViewExample.cpp
+++ b/Documentation/Examples/Containers/ArrayViewExample.cpp
@@ -45,7 +45,7 @@ void arrayViewExample()
    ArrayType a3( size );
    ViewType a3_view = a3.getView();
    auto f1 = [] __cuda_callable__ ( IndexType i, int& value ) { value = 2 * i; };
-   a3_view.forEachElement( f1 );
+   a3_view.forAllElements( f1 );
 
    for( int i = 0; i < size; i++ )
       if( a3_view.getElement( i ) != 2 * i )
diff --git a/Documentation/Examples/Containers/ArrayViewExample_forElements.cpp b/Documentation/Examples/Containers/ArrayViewExample_forElements.cpp
index f01c2972fd9ccc803079cd51aedd54c06e2b030f..9a9966318ab030d5b8483a0e87da0e847fae6620 100644
--- a/Documentation/Examples/Containers/ArrayViewExample_forElements.cpp
+++ b/Documentation/Examples/Containers/ArrayViewExample_forElements.cpp
@@ -18,7 +18,7 @@ void forElementsExample()
     * Create an ArrayView and use it for initiation of elements of array `a`
     */
    auto a_view = a.getView();
-   a_view.forEachElement( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
+   a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
 
    /****
     * Initiate elements of array `b` with indexes 0-4 using `a_view`
diff --git a/Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp b/Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp
index ed767c7db5ae6c08dd1f9c35661ac0a18ffdc396..1357ac8d0acd0bb8df72b077876c214d4d749524 100644
--- a/Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp
+++ b/Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp
@@ -18,7 +18,7 @@ void reduceElementsExample()
    /****
     * Initiate the elements of array `a`
     */
-   a_view.forEachElement( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
+   a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
 
    /****
     * Sum all elements of array `a`
diff --git a/Documentation/Examples/Containers/CMakeLists.txt b/Documentation/Examples/Containers/CMakeLists.txt
index 158149e3be91f8905f3802f9b34c843f29d371ec..bd7d9b714744d82bcba48e27c2c38538c70e84f1 100644
--- a/Documentation/Examples/Containers/CMakeLists.txt
+++ b/Documentation/Examples/Containers/CMakeLists.txt
@@ -10,8 +10,8 @@ set( COMMON_EXAMPLES
 
 if( BUILD_CUDA )
    foreach( target IN ITEMS ${COMMON_EXAMPLES} )
-      cuda_add_executable( ${target} ${target}.cu OPTIONS )
-      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
       set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
    endforeach()
 else()
diff --git a/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt
index 0f87cdc6e14d7f51d81088ccb69f5e65b0df026a..e2814577681fa85d54b707b091f7d86fe7dcd49e 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt
+++ b/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt
@@ -1,283 +1,50 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list_cuda DenseMatrixExample_Constructor_init_list.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out
-                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElements_cuda DenseMatrixExample_setElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out
-                       OUTPUT DenseMatrixExample_setElements.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths_cuda DenseMatrixExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out
-                       OUTPUT DenseMatrixExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElementsCount_cuda DenseMatrixExample_getElementsCount.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out
-                       OUTPUT DenseMatrixExample_getElementsCount.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getConstRow_cuda DenseMatrixExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out
-                       OUTPUT DenseMatrixExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getRow_cuda DenseMatrixExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out
-                       OUTPUT DenseMatrixExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElement_cuda DenseMatrixExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
-                       OUTPUT DenseMatrixExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_addElement_cuda DenseMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
-                       OUTPUT DenseMatrixExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElement_cuda DenseMatrixExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out
-                       OUTPUT DenseMatrixExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_cuda DenseMatrixExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out
-                       OUTPUT DenseMatrixExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction_cuda DenseMatrixExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out
-                       OUTPUT DenseMatrixExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_forElements_cuda DenseMatrixExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forElements.out
-                       OUTPUT DenseMatrixExample_forElements.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_forEachElement_cuda DenseMatrixExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forEachElement.out
-                       OUTPUT DenseMatrixExample_forEachElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out
-                       OUTPUT DenseMatrixViewExample_constructor.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getCompressedRowLengths_cuda DenseMatrixViewExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT DenseMatrixViewExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getElementsCount_cuda DenseMatrixViewExample_getElementsCount.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElementsCount_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElementsCount.out
-                       OUTPUT DenseMatrixViewExample_getElementsCount.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getConstRow_cuda DenseMatrixViewExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getConstRow.out
-                       OUTPUT DenseMatrixViewExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getRow_cuda DenseMatrixViewExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getRow.out
-                       OUTPUT DenseMatrixViewExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_setElement_cuda DenseMatrixViewExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out
-                       OUTPUT DenseMatrixViewExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_addElement_cuda DenseMatrixViewExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_addElement.out
-                       OUTPUT DenseMatrixViewExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getElement_cuda DenseMatrixViewExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElement.out
-                       OUTPUT DenseMatrixViewExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_rowsReduction_cuda DenseMatrixViewExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_rowsReduction.out
-                       OUTPUT DenseMatrixViewExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_allRowsReduction_cuda DenseMatrixViewExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_allRowsReduction.out
-                       OUTPUT DenseMatrixViewExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_forElements_cuda DenseMatrixViewExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forElements.out
-                       OUTPUT DenseMatrixViewExample_forElements.out )
+set( COMMON_EXAMPLES
+    DenseMatrixExample_Constructor_init_list
+    DenseMatrixExample_setElements
+    DenseMatrixExample_getCompressedRowLengths
+    DenseMatrixExample_getElementsCount
+    DenseMatrixExample_getConstRow
+    DenseMatrixExample_getRow
+    DenseMatrixExample_setElement
+    DenseMatrixExample_addElement
+    DenseMatrixExample_getElement
+    DenseMatrixExample_reduceRows
+    DenseMatrixExample_reduceAllRows
+    DenseMatrixExample_forElements
+    DenseMatrixExample_forAllElements
+    DenseMatrixExample_forRows
+    DenseMatrixViewExample_constructor
+    DenseMatrixViewExample_getCompressedRowLengths
+    DenseMatrixViewExample_getElementsCount
+    DenseMatrixViewExample_getConstRow
+    DenseMatrixViewExample_getRow
+    DenseMatrixViewExample_setElement
+    DenseMatrixViewExample_addElement
+    DenseMatrixViewExample_getElement
+    DenseMatrixViewExample_reduceRows
+    DenseMatrixViewExample_reduceAllRows
+    DenseMatrixViewExample_forElements
+    DenseMatrixViewExample_forRows
+    DenseMatrixViewExample_forAllElements
+)
 
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_forEachElement_cuda DenseMatrixViewExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forEachElement.out
-                       OUTPUT DenseMatrixViewExample_forEachElement.out )
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
 
+IF( BUILD_CUDA )
+   ADD_CUSTOM_TARGET( RunDenseMatricesExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
 ELSE()
-   ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out
-                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_setElements DenseMatrixExample_setElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out
-                       OUTPUT DenseMatrixExample_setElements.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths DenseMatrixExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out
-                       OUTPUT DenseMatrixExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getElementsCount DenseMatrixExample_getElementsCount.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out
-                       OUTPUT DenseMatrixExample_getElementsCount.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getConstRow DenseMatrixExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out
-                       OUTPUT DenseMatrixExample_getConstRow.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getRow DenseMatrixExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out
-                       OUTPUT DenseMatrixExample_getRow.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_setElement DenseMatrixExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
-                       OUTPUT DenseMatrixExample_setElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_addElement DenseMatrixExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
-                       OUTPUT DenseMatrixExample_addElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_getElement DenseMatrixExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out
-                       OUTPUT DenseMatrixExample_getElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_rowsReduction DenseMatrixExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out
-                       OUTPUT DenseMatrixExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction DenseMatrixExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out
-                       OUTPUT DenseMatrixExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_forElements DenseMatrixExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forElements.out
-                       OUTPUT DenseMatrixExample_forElements.out )
-
-   ADD_EXECUTABLE( DenseMatrixExample_forEachElement DenseMatrixExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forEachElement.out
-                       OUTPUT DenseMatrixExample_forEachElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out
-                       OUTPUT DenseMatrixViewExample_constructor.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_getCompressedRowLengths DenseMatrixViewExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT DenseMatrixViewExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_getElementsCount DenseMatrixViewExample_getElementsCount.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElementsCount >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElementsCount.out
-                       OUTPUT DenseMatrixViewExample_getElementsCount.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_getConstRow DenseMatrixViewExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getConstRow.out
-                       OUTPUT DenseMatrixViewExample_getConstRow.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_getRow DenseMatrixViewExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getRow.out
-                       OUTPUT DenseMatrixViewExample_getRow.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_setElement DenseMatrixViewExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out
-                       OUTPUT DenseMatrixViewExample_setElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_addElement DenseMatrixViewExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_addElement.out
-                       OUTPUT DenseMatrixViewExample_addElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_getElement DenseMatrixViewExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElement.out
-                       OUTPUT DenseMatrixViewExample_getElement.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_rowsReduction DenseMatrixViewExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_rowsReduction.out
-                       OUTPUT DenseMatrixViewExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_allRowsReduction DenseMatrixViewExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_allRowsReduction.out
-                       OUTPUT DenseMatrixViewExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_forElements DenseMatrixViewExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forElements.out
-                       OUTPUT DenseMatrixViewExample_forElements.out )
-
-   ADD_EXECUTABLE( DenseMatrixViewExample_forEachElement DenseMatrixViewExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forEachElement.out
-                       OUTPUT DenseMatrixViewExample_forEachElement.out )
-
+   ADD_CUSTOM_TARGET( RunDenseMatricesExamples ALL DEPENDS ${HOST_OUTPUTS} )
 ENDIF()
 
-ADD_CUSTOM_TARGET( RunDenseMatricesExamples ALL DEPENDS
-   DenseMatrixExample_Constructor_init_list.out
-   DenseMatrixExample_setElements.out
-   DenseMatrixExample_getCompressedRowLengths.out
-   DenseMatrixExample_getElementsCount.out
-   DenseMatrixExample_getConstRow.out
-   DenseMatrixExample_getRow.out
-   DenseMatrixExample_setElement.out
-   DenseMatrixExample_addElement.out
-   DenseMatrixExample_getElement.out
-   DenseMatrixExample_rowsReduction.out
-   DenseMatrixExample_allRowsReduction.out
-   DenseMatrixExample_forElements.out
-   DenseMatrixExample_forEachElement.out
-   DenseMatrixViewExample_constructor.out
-   DenseMatrixViewExample_getCompressedRowLengths.out
-   DenseMatrixViewExample_getElementsCount.out
-   DenseMatrixViewExample_getConstRow.out
-   DenseMatrixViewExample_getRow.out
-   DenseMatrixViewExample_setElement.out
-   DenseMatrixViewExample_addElement.out
-   DenseMatrixViewExample_getElement.out
-   DenseMatrixViewExample_rowsReduction.out
-   DenseMatrixViewExample_allRowsReduction.out
-   DenseMatrixViewExample_forElements.out
-   DenseMatrixViewExample_forEachElement.out
-
-)
-
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu
deleted file mode 120000
index 70f517f68bde7c679d39e1e315879355f0f366a8..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forEachElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllElements.cpp
similarity index 80%
rename from Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllElements.cpp
index 8b205e824f2c9e42869b541ecddfd0ecd258137b..4fd7d3b4727e8c4aca8d7c066a88038b80b4a887 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
 
@@ -15,17 +15,17 @@ void forEachElementExample()
          value = rowIdx + columnIdx;
    };
 
-   matrix.forEachElement( f );
+   matrix.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllElements.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..e2c1d6f8a1bf5fe623da145416e765c8b92ff9db
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllElements.cu
@@ -0,0 +1 @@
+DenseMatrixExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forEachElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forEachElement.cu
deleted file mode 120000
index 8d658cfdbc0be5bb8e293fff1e7d40a63f9476b6..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea8c88484910d0e59bbc24e36bfd5a047ac8d568
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
@@ -0,0 +1,52 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   using MatrixType = TNL::Matrices::DenseMatrix< double, Device >;
+   using RowView = typename MatrixType::RowView;
+   const int size = 5;
+   MatrixType matrix( size, size );
+
+   /***
+    * Set the matrix elements.
+    */
+   auto f = [=] __cuda_callable__ ( RowView& row ) mutable {
+      const int& rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setValue( rowIdx - 1, -1.0 );
+      row.setValue( rowIdx, rowIdx + 1.0 );
+      if( rowIdx < size - 1 )
+         row.setValue( rowIdx + 1, -1.0 );
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+
+   /***
+    * Now divide each matrix row by its largest element with use of iterators.
+    */
+   matrix.forAllRows( [=] __cuda_callable__ ( RowView& row ) mutable {
+      double largest = std::numeric_limits< double >::lowest();
+      for( auto element : row )
+         largest = TNL::max( largest, element.value() );
+      for( auto element : row )
+         element.value() /= largest;
+   } );
+   std::cout << "Divide each matrix row by its largest element... " << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f97a66ee329635c4522ad123e16e3a173f5d8884
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu
@@ -0,0 +1 @@
+DenseMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp
index c61a1c8221a39073099300ddd569b034104b52f9..2e05b167870b4c4cd33dbd3f26fe6d0bdab2c83f 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp
@@ -23,7 +23,7 @@ void getRowExample()
     */
    auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
       auto row = matrix->getRow( rowIdx );
-      return row.getElement( rowIdx );
+      return row.getValue( rowIdx );
    };
 
    /***
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
index 0cf1e1d76d938a32b95b8702d659d47025851998..615289066b5400b33a3e10307039035aa3497f74 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp
@@ -13,7 +13,7 @@ void getRowExample()
 
    auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
       auto row = matrix->getRow( rowIdx );
-      row.setElement( rowIdx, 10 * ( rowIdx + 1 ) );
+      row.setValue( rowIdx, 10 * ( rowIdx + 1 ) );
    };
 
    /***
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cpp
similarity index 88%
rename from Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cpp
index ce323671fc86e77ae19c513487b41148afc9cf84..2f63d7f05ef03e5df1939cb5c555ffd766175ea5 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix {
       {  1,  0,  0,  0,  0 },
@@ -49,7 +49,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
 }
@@ -57,10 +57,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "All rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "All rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..94d48afbbd8854c910cd09bacfca2c63401b0544
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cu
@@ -0,0 +1 @@
+DenseMatrixExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cpp
similarity index 87%
rename from Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cpp
index b521d15d26cc789c6f3f8b6c32ead723d35ac1f4..af9664411081a5b04411016c99840a7182018748 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix {
       {  1,  0,  0,  0,  0 },
@@ -48,7 +48,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
 }
@@ -56,10 +56,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4ca91f19856e5b34f68fc64b75a7e007cfe19956
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cu
@@ -0,0 +1 @@
+DenseMatrixExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu
deleted file mode 120000
index 41bf46ebc4242f1d736981068acab56004a1786d..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu
deleted file mode 120000
index 61dd891255d95692787f5742566c2bcdc0872190..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forEachElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllElements.cpp
similarity index 81%
rename from Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllElements.cpp
index d2eae02e0e7bdaea7f14ab2fde18a58c790f171a..66b39413017b41b98b67eb2a3ff48455110cd8ec 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
    auto matrixView = matrix.getView();
@@ -16,17 +16,17 @@ void forEachElementExample()
          value = rowIdx + columnIdx;
    };
 
-   matrixView.forEachElement( f );
+   matrixView.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllElements.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..2485aa620ea750bc65c0369c9e2a1b986e13e687
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllElements.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forEachElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forEachElement.cu
deleted file mode 120000
index 1094e7baad0436becb84b9721919b24e5c0ef164..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixViewExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forElements.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forElements.cpp
index cdc9fac58a49494a6218085d62a54a29dd0ba003..6a980d23c148599835882425db3a03475612ccef 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forElements.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forElements.cpp
@@ -10,7 +10,7 @@ void forElementsExample()
    auto matrixView = matrix.getView();
 
    auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) {
-      if( rowIdx < columnIdx )
+      if( columnIdx > rowIdx )
          compute = false;
       else
          value = rowIdx + columnIdx;
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..06d6c855383154ea1b6c38865d4db541b1ad10de
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
@@ -0,0 +1,53 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   using MatrixType = TNL::Matrices::DenseMatrix< double, Device >;
+   using RowView = typename MatrixType::RowView;
+   const int size = 5;
+   MatrixType matrix( size, size );
+   auto view = matrix.getView();
+
+   /***
+    * Set the matrix elements.
+    */
+   auto f = [=] __cuda_callable__ ( RowView& row ) mutable {
+      const int& rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setValue( rowIdx - 1, -1.0 );
+      row.setValue( rowIdx, rowIdx + 1.0 );
+      if( rowIdx < size - 1 )
+         row.setValue( rowIdx + 1, -1.0 );
+   };
+   view.forAllRows( f );
+   std::cout << matrix << std::endl;
+
+   /***
+    * Now divide each matrix row by its largest element - with the use of iterators.
+    */
+   view.forAllRows( [=] __cuda_callable__ ( RowView& row ) mutable {
+      double largest = std::numeric_limits< double >::lowest();
+      for( auto element : row )
+         largest = TNL::max( largest, element.value() );
+      for( auto element : row )
+         element.value() /= largest;
+   } );
+   std::cout << "Divide each matrix row by its largest element... " << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..8111505a3bafe0c6aaad3434405418d628efeb90
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp
index 4572f41a38104aab7297d99a7e65920d351c7dfe..5c9e83f39335fdfcaee79449fc57f0dd3e106d0b 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp
@@ -6,19 +6,19 @@
 template< typename Device >
 void getCompressedRowLengthsExample()
 {
-   TNL::Matrices::DenseMatrix< double, Device > triangularMatrix {
+   TNL::Matrices::DenseMatrix< double, Device > denseMatrix {
       {  1 },
       {  2,  3 },
       {  4,  5,  6 },
       {  7,  8,  9, 10 },
       { 11, 12, 13, 14, 15 }
    };
-   auto triangularMatrixView = triangularMatrix.getConstView();
+   auto denseMatrixView = denseMatrix.getConstView();
 
-   std::cout << triangularMatrixView << std::endl;
+   std::cout << denseMatrixView << std::endl;
 
    TNL::Containers::Vector< int, Device > rowLengths;
-   triangularMatrixView.getCompressedRowLengths( rowLengths );
+   denseMatrixView.getCompressedRowLengths( rowLengths );
 
    std::cout << "Compressed row lengths are: " << rowLengths << std::endl;
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp
index a0b9980242fe33c0c4a76e2b6f8dc549b85fa293..5fc1195ecb62974794778f4dc543c901c31e0a4a 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp
@@ -26,7 +26,7 @@ void getRowExample()
     */
    auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double {
       auto row = matrixView.getRow( rowIdx );
-      return row.getElement( rowIdx );
+      return row.getValue( rowIdx );
    };
 
    int trace = TNL::Algorithms::Reduction< Device >::reduce( 0, matrix.getRows(), fetch, std::plus<>{}, 0 );
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
index 8142599c14c0aa6f810f55ba3ff0d24e21c761de..0afd842178df4cf56b8f624cb5a5632f38d9cabd 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp
@@ -7,7 +7,8 @@
 template< typename Device >
 void getRowExample()
 {
-   TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 );
+   const int size = 5;
+   TNL::Matrices::DenseMatrix< double, Device > matrix( size, size );
 
    /***
     * Create dense matrix view which can be captured by the following lambda
@@ -17,7 +18,11 @@ void getRowExample()
 
    auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
       auto row = matrixView.getRow( rowIdx );
-      row.setElement( rowIdx, 10 * ( rowIdx + 1 ) );
+      if( rowIdx > 0 )
+         row.setValue( rowIdx - 1, -1.0 );
+      row.setValue( rowIdx, rowIdx + 1.0 );
+      if( rowIdx < size - 1 )
+         row.setValue( rowIdx + 1, -1.0 );
    };
 
    /***
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cpp
similarity index 88%
rename from Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cpp
index b65cb3ea9d798184812aff48c15cf46d5f3321a0..5f008495066706a666b0a177fc173cb9a8ca3cc3 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix {
       {  1,  0,  0,  0,  0 },
@@ -49,7 +49,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrixView.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrixView.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
 }
@@ -57,10 +57,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "All rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "All rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..952b099f76361bac1c7b881d59567c81c4e1f801
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cpp
similarity index 88%
rename from Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cpp
index dbc44f85486191b7c09e9b5e38a9a9951a0bfaca..a6b46986c2959ed10e5c9c0c7e691626b26134ea 100644
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix {
       {  1,  0,  0,  0,  0 },
@@ -49,7 +49,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
 }
@@ -57,10 +57,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..e55a80ebfb4612c7c49fcf50d84b5d87c7e264f0
--- /dev/null
+++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cu
@@ -0,0 +1 @@
+DenseMatrixViewExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu
deleted file mode 120000
index 4170aaa628a8965768169b1da468517430143990..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt
index 49a39b7fb9a7eb86c90f5aa73a58cf49f577e0a2..f16a4e6972fae5d09b1b2013634c445f475c422c 100644
--- a/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt
+++ b/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt
@@ -1,90 +1,33 @@
-ADD_EXECUTABLE( LambdaMatrixExample_Constructor LambdaMatrixExample_Constructor.cpp )
-ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Constructor >
-                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Constructor.out
-                    OUTPUT LambdaMatrixExample_Constructor.out )
-
-ADD_EXECUTABLE( LambdaMatrixExample_getCompressedRowLengths LambdaMatrixExample_getCompressedRowLengths.cpp )
-ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getCompressedRowLengths >
-                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getCompressedRowLengths.out
-                    OUTPUT LambdaMatrixExample_getCompressedRowLengths.out )
+set( COMMON_EXAMPLES
+   LambdaMatrixExample_Constructor
+   LambdaMatrixExample_getCompressedRowLengths
+   LambdaMatrixExample_getNonzeroElementsCount
+   LambdaMatrixExample_Laplace
+   LambdaMatrixExample_Laplace_2
+   LambdaMatrixExample_getRow
+   LambdaMatrixExample_reduceRows
+   LambdaMatrixExample_reduceAllRows
+   LambdaMatrixExample_forElements
+   LambdaMatrixExample_forAllElements
+   LambdaMatrixExample_forRows
+)
 
-ADD_EXECUTABLE( LambdaMatrixExample_getNonzeroElementsCount LambdaMatrixExample_getNonzeroElementsCount.cpp )
-ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getNonzeroElementsCount >
-                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getNonzeroElementsCount.out
-                    OUTPUT LambdaMatrixExample_getNonzeroElementsCount.out )
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
 
 IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_Laplace_cuda LambdaMatrixExample_Laplace.cu )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace.out
-                     OUTPUT LambdaMatrixExample_Laplace.out )
-
-   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_Laplace_2_cuda LambdaMatrixExample_Laplace_2.cu )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace_2_cuda >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace_2.out
-                       OUTPUT LambdaMatrixExample_Laplace_2.out )
-
-                     CUDA_ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction_cuda LambdaMatrixExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out
-                       OUTPUT LambdaMatrixExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_allRowsReduction_cuda LambdaMatrixExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_allRowsReduction.out
-                       OUTPUT LambdaMatrixExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_forElements_cuda LambdaMatrixExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forElements.out
-                       OUTPUT LambdaMatrixExample_forElements.out )
-
-   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_forEachElement_cuda LambdaMatrixExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forEachElement.out
-                       OUTPUT LambdaMatrixExample_forEachElement.out )
-
+   ADD_CUSTOM_TARGET( RunLambdaMatricesExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
 ELSE()
-   ADD_EXECUTABLE( LambdaMatrixExample_Laplace LambdaMatrixExample_Laplace.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace.out
-                       OUTPUT LambdaMatrixExample_Laplace.out )
-
-   ADD_EXECUTABLE( LambdaMatrixExample_Laplace_2 LambdaMatrixExample_Laplace_2.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace_2 >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace_2.out
-                        OUTPUT LambdaMatrixExample_Laplace_2.out )
-
-   ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction LambdaMatrixExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out
-                       OUTPUT LambdaMatrixExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( LambdaMatrixExample_allRowsReduction LambdaMatrixExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_allRowsReduction.out
-                       OUTPUT LambdaMatrixExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( LambdaMatrixExample_forElements LambdaMatrixExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forElements >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forElements.out
-                       OUTPUT LambdaMatrixExample_forElements.out )
-
-   ADD_EXECUTABLE( LambdaMatrixExample_forEachElement LambdaMatrixExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forEachElement.out
-                       OUTPUT LambdaMatrixExample_forEachElement.out )
-ENDIF()
-
-ADD_CUSTOM_TARGET( RunLambdaMatricesExamples ALL DEPENDS
-   LambdaMatrixExample_Constructor.out
-   LambdaMatrixExample_Laplace.out
-   LambdaMatrixExample_Laplace_2.out
-   LambdaMatrixExample_getCompressedRowLengths.out
-   LambdaMatrixExample_getNonzeroElementsCount.out
-   LambdaMatrixExample_rowsReduction.out
-   LambdaMatrixExample_allRowsReduction.out
-   LambdaMatrixExample_forElements.out
-   LambdaMatrixExample_forEachElement.out
-)
-
+   ADD_CUSTOM_TARGET( RunLambdaMatricesExamples ALL DEPENDS ${HOST_OUTPUTS} )
+ENDIF()
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cu
new file mode 120000
index 0000000000000000000000000000000000000000..b191c4ff120fc6a421db10e41eaa996dc4b062e5
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_Constructor.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu
deleted file mode 120000
index f76fba15c56a9d0c0f26f605c6745b2e3cd28da3..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-LambdaMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forEachElement.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllElements.cpp
similarity index 89%
rename from Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllElements.cpp
index 282dae100f0bff38c19347319bf43128e631200f..293f173d2a0115ea46b03ac09018e399ad7c99f1 100644
--- a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllElements.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    /***
     * Lambda functions defining the matrix.
@@ -26,7 +26,7 @@ void forEachElementExample()
       denseView.setElement( rowIdx, columnIdx, value );
    };
 
-   matrix.forEachElement( f );
+   matrix.forAllElements( f );
    std::cout << "Original lambda matrix:" << std::endl << matrix << std::endl;
    std::cout << "Dense matrix:" << std::endl << denseMatrix << std::endl;
 }
@@ -34,10 +34,10 @@ void forEachElementExample()
 int main( int argc, char* argv[] )
 {
    std::cout << "Copying matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Copying matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllElements.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..493efb523b88e561c98b38310007992d6646c02c
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllElements.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forEachElement.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forEachElement.cu
deleted file mode 120000
index 0b12a40daa3a695d9534b2552db7b3714daa2da5..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-LambdaMatrixExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f8d826710cee0cdee10f01e09feb46f1e727c721
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
@@ -0,0 +1,104 @@
+#include <iostream>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set  matrix representing approximation of the Laplace operator on regular
+    * grid using the finite difference method.
+    */
+   const int gridSize( 4 );
+   const int matrixSize = gridSize * gridSize;
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int
+   {
+      const int gridRow = rowIdx / gridSize;                  // coordinates in the numerical grid
+      const int gridColumn = rowIdx % gridSize;
+      if( gridRow == 0 || gridRow == gridSize - 1 ||          // boundary grid node
+          gridColumn == 0 || gridColumn == gridSize - 1 )
+          return 1;
+      return 5;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value) {
+      const int gridRow = rowIdx / gridSize;                  // coordinates in the numerical grid
+      const int gridColumn = rowIdx % gridSize;
+      if( gridRow == 0 || gridRow == gridSize - 1 ||          // boundary grid node
+          gridColumn == 0 || gridColumn == gridSize - 1 )
+         {
+            columnIdx = rowIdx;                               // diagonal element ....
+            value = 1.0;                                      // ... is set to 1
+         }
+         else                                                 // interior grid node
+         {
+            switch( localIdx )                                // set diagonal element to 4
+            {                                                 // and the others to -1
+               case 0:
+                  columnIdx = rowIdx - gridSize;
+                  value = 1;
+                  break;
+               case 1:
+                  columnIdx = rowIdx - 1;
+                  value = 1;
+                  break;
+               case 2:
+                  columnIdx = rowIdx;
+                  value = -4;
+                  break;
+               case 3:
+                  columnIdx = rowIdx + 1;
+                  value = 1;
+                  break;
+               case 4:
+                  columnIdx = rowIdx + gridSize;
+                  value = 1;
+                  break;
+            }
+         }
+   };
+   auto matrix = TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create(
+      matrixSize, matrixSize, matrixElements, rowLengths );
+   using MatrixType = decltype( matrix );
+   using RowView = typename MatrixType::RowView;
+
+   TNL::Matrices::DenseMatrix< double, Device > denseMatrix( matrixSize, matrixSize );
+   denseMatrix.setValue( 0.0 );
+   auto dense_view = denseMatrix.getView();
+   auto f = [=] __cuda_callable__ ( const RowView& row ) mutable {
+      auto dense_row = dense_view.getRow( row.getRowIndex() );
+      for( int localIdx = 0; localIdx < row.getSize(); localIdx++ )
+         dense_row.setValue( row.getColumnIndex( localIdx ), row.getValue( localIdx ) );
+   };
+   matrix.forAllRows( f );
+
+   std::cout << "Laplace operator lambda matrix: " << std::endl << matrix << std::endl;
+   std::cout << "Laplace operator dense matrix: " << std::endl << denseMatrix << std::endl;
+
+   /***
+    * Compute sum of elements in each row and store it into a vector.
+    */
+   TNL::Containers::Vector< double, Device > sum_vector( matrixSize );
+   auto sum_view = sum_vector.getView();
+   matrix.forAllRows( [=] __cuda_callable__ ( const RowView& row ) mutable {
+      double sum( 0.0 );
+      for( auto element : row )
+         sum += TNL::abs( element.value() );
+      sum_view[ row.getRowIndex() ] = sum;
+   } );
+
+   std::cout << "Sums in matrix rows = " << sum_vector << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Running example on CPU ... " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Running example on CUDA GPU ... " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6df275619c15af4f43617de7d068083cf4028590
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cu
new file mode 120000
index 0000000000000000000000000000000000000000..2427f3fb96beee5a7c766e3c4155d6543837e986
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_getNonzeroElementsCount.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e5e369089c968b86ce4f50f6513115d8b4943453
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cpp
@@ -0,0 +1,90 @@
+#include <iostream>
+#include <TNL/Matrices/LambdaMatrix.h>
+#include <TNL/Matrices/DenseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+
+template< typename Device >
+void getRowExample()
+{
+   /***
+    * Set  matrix representing approximation of the Laplace operator on regular
+    * grid using the finite difference method.
+    */
+   const int gridSize( 4 );
+   const int matrixSize = gridSize * gridSize;
+   auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int
+   {
+      const int gridRow = rowIdx / gridSize;                  // coordinates in the numerical grid
+      const int gridColumn = rowIdx % gridSize;
+      if( gridRow == 0 || gridRow == gridSize - 1 ||          // boundary grid node
+          gridColumn == 0 || gridColumn == gridSize - 1 )
+          return 1;
+      return 5;
+   };
+   auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value) {
+      const int gridRow = rowIdx / gridSize;                  // coordinates in the numerical grid
+      const int gridColumn = rowIdx % gridSize;
+      if( gridRow == 0 || gridRow == gridSize - 1 ||          // boundary grid node
+          gridColumn == 0 || gridColumn == gridSize - 1 )
+         {
+            columnIdx = rowIdx;                               // diagonal element ....
+            value = 1.0;                                      // ... is set to 1
+         }
+         else                                                 // interior grid node
+         {
+            switch( localIdx )                                // set diagonal element to 4
+            {                                                 // and the others to -1
+               case 0:
+                  columnIdx = rowIdx - gridSize;
+                  value = 1;
+                  break;
+               case 1:
+                  columnIdx = rowIdx - 1;
+                  value = 1;
+                  break;
+               case 2:
+                  columnIdx = rowIdx;
+                  value = -4;
+                  break;
+               case 3:
+                  columnIdx = rowIdx + 1;
+                  value = 1;
+                  break;
+               case 4:
+                  columnIdx = rowIdx + gridSize;
+                  value = 1;
+                  break;
+            }
+         }
+   };
+   auto matrix = TNL::Matrices::LambdaMatrixFactory< double, Device, int >::create(
+      matrixSize, matrixSize, matrixElements, rowLengths );
+   using MatrixType = decltype( matrix );
+
+   TNL::Matrices::DenseMatrix< double, Device > denseMatrix( matrixSize, matrixSize );
+   denseMatrix.setValue( 0.0 );
+   auto dense_view = denseMatrix.getView();
+   auto f = [=] __cuda_callable__ ( const int& rowIdx ) mutable {
+      auto row = matrix.getRow( rowIdx );
+      auto dense_row = dense_view.getRow( rowIdx );
+      for( int localIdx = 0; localIdx < row.getSize(); localIdx++ )
+         dense_row.setValue( row.getColumnIndex( localIdx ), row.getValue( localIdx ) );
+   };
+   TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f );
+
+   std::cout << "Laplace operator lambda matrix: " << std::endl << matrix << std::endl;
+   std::cout << "Laplace operator dense matrix: " << std::endl << denseMatrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Running example on CPU ... " << std::endl;
+   getRowExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Running example on CUDA GPU ... " << std::endl;
+   getRowExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cu
new file mode 120000
index 0000000000000000000000000000000000000000..1794dfc1f69f1bcfcc4bfd1fca0ddcd2db13bd11
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cpp
similarity index 90%
rename from Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cpp
index a39a1f33b339aeeb9420612009bcd0080ffafb44..b90e28f86273a31f94e3a9d79f5b2d1acb8941ab 100644
--- a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    /***
     * Lambda functions defining the matrix.
@@ -54,7 +54,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -63,10 +63,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "All rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "All rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..d2ded9d9fce5825e12955f760dde2c548bba30e8
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cpp
similarity index 90%
rename from Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cpp
index 4cb0aedab1f684f9196e6c9e56439cb5b195d452..847edbca79756f0ef996befb46f93fd7efa7c26f 100644
--- a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Lambda functions defining the matrix.
@@ -54,7 +54,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -63,10 +63,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..50cb7e925d1166764ce302e940a5a356d2c1e337
--- /dev/null
+++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cu
@@ -0,0 +1 @@
+LambdaMatrixExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu
deleted file mode 120000
index ecb2401ec9f6a98146b2320d6e6d21de4580bd07..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-LambdaMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt
index ded692be2f17c0242e630b1d8a3b0d07704edeb3..78ff95236d8680d8ba882ba835f470cb45f1ca90 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt
@@ -1,290 +1,53 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_cuda MultidiagonalMatrixExample_Constructor.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor.out
-                       OUTPUT MultidiagonalMatrixExample_Constructor.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_1_cuda MultidiagonalMatrixExample_Constructor_init_list_1.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_1_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_1.out
-                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_1.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_2_cuda MultidiagonalMatrixExample_Constructor_init_list_2.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_2_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_2.out
-                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_2.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getSerializationType_cuda MultidiagonalMatrixExample_getSerializationType.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getSerializationType_cuda > 
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getSerializationType.out
-                       OUTPUT MultidiagonalMatrixExample_getSerializationType.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_setElements_cuda MultidiagonalMatrixExample_setElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElements_cuda > 
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElements.out
-                       OUTPUT MultidiagonalMatrixExample_setElements.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getCompressedRowLengths_cuda MultidiagonalMatrixExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getCompressedRowLengths.out
-                       OUTPUT MultidiagonalMatrixExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getConstRow_cuda MultidiagonalMatrixExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getConstRow.out
-                       OUTPUT MultidiagonalMatrixExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getRow_cuda MultidiagonalMatrixExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getRow.out
-                       OUTPUT MultidiagonalMatrixExample_getRow.out )
-
+set( COMMON_EXAMPLES
+   MultidiagonalMatrixExample_Constructor
+    MultidiagonalMatrixExample_Constructor_init_list_1
+    MultidiagonalMatrixExample_Constructor_init_list_2
+    MultidiagonalMatrixExample_getSerializationType
+    MultidiagonalMatrixExample_setElements
+    MultidiagonalMatrixExample_getCompressedRowLengths
+#    MultidiagonalMatrixExample_getElementsCount
+    MultidiagonalMatrixExample_getConstRow
+    MultidiagonalMatrixExample_getRow
 # This example does not work with nvcc 10.1. Restore it here when it works.
-#   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement_cuda MultidiagonalMatrixExample_setElement.cu )
-#   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement_cuda >
-#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out
-#                       OUTPUT MultidiagonalMatrixExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_addElement_cuda MultidiagonalMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_addElement.out
-                       OUTPUT MultidiagonalMatrixExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getElement_cuda MultidiagonalMatrixExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getElement.out
-                       OUTPUT MultidiagonalMatrixExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_rowsReduction_cuda MultidiagonalMatrixExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_rowsReduction.out
-                       OUTPUT MultidiagonalMatrixExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_allRowsReduction_cuda MultidiagonalMatrixExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_allRowsReduction.out
-                       OUTPUT MultidiagonalMatrixExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_forElements_cuda MultidiagonalMatrixExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forElements.out
-                       OUTPUT MultidiagonalMatrixExample_forElements.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_forEachElement_cuda MultidiagonalMatrixExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forEachElement.out
-                       OUTPUT MultidiagonalMatrixExample_forEachElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getCompressedRowLengths_cuda MultidiagonalMatrixViewExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT MultidiagonalMatrixViewExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getConstRow_cuda MultidiagonalMatrixViewExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getConstRow.out
-                       OUTPUT MultidiagonalMatrixViewExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getRow_cuda MultidiagonalMatrixViewExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getRow.out
-                       OUTPUT MultidiagonalMatrixViewExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_setElement_cuda MultidiagonalMatrixViewExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_setElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_addElement_cuda MultidiagonalMatrixViewExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_addElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getElement_cuda MultidiagonalMatrixViewExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getElement_cuda >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_rowsReduction_cuda MultidiagonalMatrixViewExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_rowsReduction.out
-                       OUTPUT MultidiagonalMatrixViewExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_allRowsReduction_cuda MultidiagonalMatrixViewExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_allRowsReduction.out
-                       OUTPUT MultidiagonalMatrixViewExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forElements_cuda MultidiagonalMatrixViewExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forElements.out
-                       OUTPUT MultidiagonalMatrixViewExample_forElements.out )
+#    MultidiagonalMatrixExample_setElement
+    MultidiagonalMatrixExample_addElement
+    MultidiagonalMatrixExample_getElement
+    MultidiagonalMatrixExample_reduceRows
+    MultidiagonalMatrixExample_reduceAllRows
+    MultidiagonalMatrixExample_forElements
+    MultidiagonalMatrixExample_forAllElements
+    MultidiagonalMatrixExample_forRows
+#    MultidiagonalMatrixViewExample_constructor
+    MultidiagonalMatrixViewExample_getCompressedRowLengths
+#    MultidiagonalMatrixViewExample_getElementsCount
+    MultidiagonalMatrixViewExample_getConstRow
+    MultidiagonalMatrixViewExample_getRow
+    MultidiagonalMatrixViewExample_setElement
+    MultidiagonalMatrixViewExample_addElement
+    MultidiagonalMatrixViewExample_getElement
+    MultidiagonalMatrixViewExample_reduceRows
+    MultidiagonalMatrixViewExample_reduceAllRows
+    MultidiagonalMatrixViewExample_forElements
+    MultidiagonalMatrixViewExample_forAllElements
+    MultidiagonalMatrixViewExample_forRows
+)
 
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forEachElement_cuda MultidiagonalMatrixViewExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forEachElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_forEachElement.out )
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
 
+IF( BUILD_CUDA )
+   ADD_CUSTOM_TARGET( RunMultidiagonalMatricesExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
 ELSE()
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor MultidiagonalMatrixExample_Constructor.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor.out
-                       OUTPUT MultidiagonalMatrixExample_Constructor.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_1 MultidiagonalMatrixExample_Constructor_init_list_1.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_1 >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_1.out
-                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_1.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_2 MultidiagonalMatrixExample_Constructor_init_list_2.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_2 >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_2.out
-                       OUTPUT MultidiagonalMatrixExample_Constructor_init_list_2.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_getSerializationType MultidiagonalMatrixExample_getSerializationType.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getSerializationType >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getSerializationType.out
-                       OUTPUT MultidiagonalMatrixExample_getSerializationType.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_setElements MultidiagonalMatrixExample_setElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElements.out
-                       OUTPUT MultidiagonalMatrixExample_setElements.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_getCompressedRowLengths MultidiagonalMatrixExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getCompressedRowLengths.out
-                       OUTPUT MultidiagonalMatrixExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_getConstRow MultidiagonalMatrixExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getConstRow.out
-                       OUTPUT MultidiagonalMatrixExample_getConstRow.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_getRow MultidiagonalMatrixExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getRow.out
-                       OUTPUT MultidiagonalMatrixExample_getRow.out )
-
-#  This example does not work with nvcc 10.1. Restore it here when it works.
-#   ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement MultidiagonalMatrixExample_setElement.cpp )
-#   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement >
-#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out
-#                       OUTPUT MultidiagonalMatrixExample_setElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_addElement MultidiagonalMatrixExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_addElement.out
-                       OUTPUT MultidiagonalMatrixExample_addElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_getElement MultidiagonalMatrixExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getElement.out
-                       OUTPUT MultidiagonalMatrixExample_getElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_rowsReduction MultidiagonalMatrixExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_rowsReduction.out
-                       OUTPUT MultidiagonalMatrixExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_allRowsReduction MultidiagonalMatrixExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_allRowsReduction.out
-                       OUTPUT MultidiagonalMatrixExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_forElements MultidiagonalMatrixExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forElements >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forElements.out
-                       OUTPUT MultidiagonalMatrixExample_forElements.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_forEachElement MultidiagonalMatrixExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forEachElement.out
-                       OUTPUT MultidiagonalMatrixExample_forEachElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getCompressedRowLengths MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT MultidiagonalMatrixViewExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getConstRow MultidiagonalMatrixViewExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getConstRow.out
-                       OUTPUT MultidiagonalMatrixViewExample_getConstRow.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getRow MultidiagonalMatrixViewExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getRow.out
-                       OUTPUT MultidiagonalMatrixViewExample_getRow.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_setElement MultidiagonalMatrixViewExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_setElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_setElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_addElement MultidiagonalMatrixViewExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_addElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_addElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_addElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getElement MultidiagonalMatrixViewExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_getElement.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_rowsReduction MultidiagonalMatrixViewExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_rowsReduction >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_rowsReduction.out
-                       OUTPUT MultidiagonalMatrixViewExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_allRowsReduction MultidiagonalMatrixViewExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_allRowsReduction >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_allRowsReduction.out
-                       OUTPUT MultidiagonalMatrixViewExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forElements MultidiagonalMatrixViewExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forElements >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forElements.out
-                       OUTPUT MultidiagonalMatrixViewExample_forElements.out )
-
-   ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forEachElement MultidiagonalMatrixViewExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forEachElement.out
-                       OUTPUT MultidiagonalMatrixViewExample_forEachElement.out )
-
-ENDIF()
-
-   ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement MultidiagonalMatrixExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out
-                       OUTPUT MultidiagonalMatrixExample_setElement.out )
-
-
-ADD_CUSTOM_TARGET( RunMultidiagonalMatricesExamples ALL DEPENDS
-   MultidiagonalMatrixExample_Constructor.out
-   MultidiagonalMatrixExample_Constructor_init_list_1.out
-   MultidiagonalMatrixExample_Constructor_init_list_2.out
-   MultidiagonalMatrixExample_getSerializationType.out
-   MultidiagonalMatrixExample_setElements.out
-   MultidiagonalMatrixExample_getCompressedRowLengths.out
-   MultidiagonalMatrixExample_getConstRow.out
-   MultidiagonalMatrixExample_getRow.out
-   MultidiagonalMatrixExample_setElement.out
-   MultidiagonalMatrixExample_addElement.out
-   MultidiagonalMatrixExample_getElement.out
-   MultidiagonalMatrixExample_rowsReduction.out
-   MultidiagonalMatrixExample_allRowsReduction.out
-   MultidiagonalMatrixExample_forElements.out
-   MultidiagonalMatrixExample_forEachElement.out
-   MultidiagonalMatrixViewExample_getCompressedRowLengths.out
-   MultidiagonalMatrixViewExample_getConstRow.out
-   MultidiagonalMatrixViewExample_getRow.out
-   MultidiagonalMatrixViewExample_setElement.out
-   MultidiagonalMatrixViewExample_addElement.out
-   MultidiagonalMatrixViewExample_getElement.out
-   MultidiagonalMatrixViewExample_rowsReduction.out
-   MultidiagonalMatrixViewExample_allRowsReduction.out
-   MultidiagonalMatrixViewExample_forElements.out
-   MultidiagonalMatrixViewExample_forEachElement.out
-)
-
+   ADD_CUSTOM_TARGET( RunMultidiagonalMatricesExamples ALL DEPENDS ${HOST_OUTPUTS} )
+ENDIF()
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu
deleted file mode 120000
index c8659a5f4bc549e90cc8e84a26f41eb5a1d74a2e..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-MultidiagonalMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forEachElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllElements.cpp
similarity index 91%
rename from Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllElements.cpp
index a3af45733b16e139ff5629685e63fee68524024d..b29543d9e0cb88da80f5ed0f9381d7be772c3f48 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
@@ -40,17 +40,17 @@ void forEachElementExample()
        */
       value = 3 - localIdx;
    };
-   matrix.forEachElement( f );
+   matrix.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllElements.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4de1d606d3c717542e844e8a0532bac1e38c388e
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllElements.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forEachElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forEachElement.cu
deleted file mode 120000
index 758a054ffa0b00b51ca98abaa4a3994dafc6df6d..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-MultidiagonalMatrixExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ff377445c74041bb3e563c8bfe62de78668277a
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
@@ -0,0 +1,65 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   using RowView = typename MatrixType::RowView;
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    *
+    *    0 /  2 -1.  .  .  . \  -> { 0, 0, 1 }
+    *      | -1  2 -1  .  . |  -> { 0, 2, 1 }
+    *      |  . -1  2 -1. . |  -> { 3, 2, 1 }
+    *      |  .  . -1  2 -1 |  -> { 3, 2, 1 }
+    *      \  .  .  . -1. 2 /  -> { 3, 2, 1 }
+    *
+    * The diagonals offsets are { -1, 0, 1 }.
+    */
+    const int size = 5;
+    MatrixType matrix(
+      size,            // number of matrix rows
+      size,            // number of matrix columns
+      { -1, -0, 1 } ); // matrix diagonals offsets
+
+   auto f = [=] __cuda_callable__ ( RowView& row ) {
+      const int& rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < size - 1 )        // elements above the diagonal
+         row.setElement( 2, -1.0 );
+
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+
+   /***
+    * Compute sum of elements in each row and store it into a vector.
+    */
+   TNL::Containers::Vector< double, Device > sum_vector( size );
+   auto sum_view = sum_vector.getView();
+   matrix.forAllRows( [=] __cuda_callable__ ( RowView& row ) mutable {
+      double sum( 0.0 );
+      for( auto element : row )
+         sum += TNL::abs( element.value() );
+      sum_view[ row.getRowIndex() ] = sum;
+   } );
+
+   std::cout << "Sums in matrix rows = " << sum_vector << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..aff0dad0c51477bd34514e1e58420365f75faea5
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
index 856848bd3acb989e5cbcd8c3e66baf0f1f934009..24c4d574c2a46d8960abb8e2bd574b64b14fbac4 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
@@ -19,8 +19,8 @@ void getRowExample()
       diagonalsOffsets );
 
    auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
-      //auto row = matrix->getRow( rowIdx );    
-      // For some reason the previous line of code is not accepted by nvcc 10.1 
+      //auto row = matrix->getRow( rowIdx );
+      // For some reason the previous line of code is not accepted by nvcc 10.1
       // so we replace it with the following two lines.
       auto ref = matrix.modifyData();
       auto row = ref.getRow( rowIdx );
@@ -52,7 +52,7 @@ int main( int argc, char* argv[] )
    getRowExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
-   // It seems that nvcc 10.1 does not handle lambda functions properly. 
+   // It seems that nvcc 10.1 does not handle lambda functions properly.
    // It is hard to make nvcc to compile this example and it does not work
    // properly. We will try it with later version of CUDA.
    //std::cout << "Getting matrix rows on CUDA device: " << std::endl;
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cpp
index 4b102c73b680ca9f9c0c63f315b81c91fb24dbbf..1fedc197d9ccc7d120cf2103c7035deb094c6286 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cpp
@@ -5,26 +5,26 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
     *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
     *      | 3  2  1  .  . |  -> { 3, 2, 1 }
     *      | .  3  2  1  . |  -> { 3, 2, 1 }
-    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
-    * 
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 }
+    *
     * The diagonals offsets are { -2, -1, 0 }.
     */
    TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { -2, -1, 0 },  // diagonals offsets
       { { 0, 0, 1 },  // matrix elements
-        { 0, 2, 1 }, 
-        { 3, 2, 1 }, 
+        { 0, 2, 1 },
+        { 3, 2, 1 },
         { 3, 2, 1 },
         { 3, 2, 1 } } );
 
@@ -62,7 +62,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -71,10 +71,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..cac3890f406353e337761f85df0fd7fa2f588f1c
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cpp
index 2b579d96372fff132b063ecd166536fcd3f57f10..8e895dc0d85c814991bd9a82d9b1ecee0cbb0885 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cpp
@@ -5,26 +5,26 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
     *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
     *      | 3  2  1  .  . |  -> { 3, 2, 1 }
     *      | .  3  2  1  . |  -> { 3, 2, 1 }
-    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
-    * 
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 }
+    *
     * The diagonals offsets are { -2, -1, 0 }.
     */
    TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { -2, -1, 0 },  // diagonals offsets
       { { 0, 0, 1 },  // matrix elements
-        { 0, 2, 1 }, 
-        { 3, 2, 1 }, 
+        { 0, 2, 1 },
+        { 3, 2, 1 },
         { 3, 2, 1 },
         { 3, 2, 1 } } );
 
@@ -62,7 +62,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -71,10 +71,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..7c5170e06e3957f6bfeb737168f9de31545052a9
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu
deleted file mode 120000
index adaff28e8a549eeb8b5539535f78e5cc3594f698..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-MultidiagonalMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu
deleted file mode 120000
index 5e14692de8159377e123ed8fec43cd750143b986..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-MultidiagonalMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forEachElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllElements.cpp
similarity index 92%
rename from Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllElements.cpp
index 92c9ee9e6226e1e38cad981480278a4970b4a313..b05da1d8213143bc5ffd11625f3580c9185248e7 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
@@ -41,17 +41,17 @@ void forEachElementExample()
        */
       value = 3 - localIdx;
    };
-   view.forEachElement( f );
+   view.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllElements.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f8872fb2750e458d03052b5924ac398c296933cf
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllElements.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forEachElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forEachElement.cu
deleted file mode 120000
index 140f4ccf9f93551bef16027a2391327c6cd4b810..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-MultidiagonalMatrixViewExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8fcb6c3600959503c44fc60a2b49944375dadb28
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
@@ -0,0 +1,50 @@
+#include <iostream>
+#include <TNL/Matrices/MultidiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >;
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    *
+    *    0 /  2  .  .  .  . \  -> { 0, 0, 1 }
+    *      | -1  2 -1  .  . |  -> { 0, 2, 1 }
+    *      |  . -1  2 -1. . |  -> { 3, 2, 1 }
+    *      |  .  . -1  2 -1 |  -> { 3, 2, 1 }
+    *      \  .  .  .  .  2 /  -> { 3, 2, 1 }
+    *
+    * The diagonals offsets are { -1, 0, 1 }.
+    */
+    const int size = 5;
+    MatrixType matrix(
+      size,            // number of matrix rows
+      size,            // number of matrix columns
+      { -1, 0, 1 } ); // matrix diagonals offsets
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( typename MatrixType::RowView& row ) {
+      const int& rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < size - 1 )        // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+   view.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ec3f1ad70dabbf5aaf7ac170a72b10868d18df6e
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cpp
index 26dac464cea68636fb61458ab2e86f3ca153ed56..c235f44780e9c5d5443aa196d4b1e0f3ae11d711 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cpp
@@ -5,26 +5,26 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
     *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
     *      | 3  2  1  .  . |  -> { 3, 2, 1 }
     *      | .  3  2  1  . |  -> { 3, 2, 1 }
-    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
-    * 
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 }
+    *
     * The diagonals offsets are { -2, -1, 0 }.
     */
    TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { -2, -1, 0 },  // diagonals offsets
       { { 0, 0, 1 },  // matrix elements
-        { 0, 2, 1 }, 
-        { 3, 2, 1 }, 
+        { 0, 2, 1 },
+        { 3, 2, 1 },
         { 3, 2, 1 },
         { 3, 2, 1 } } );
    auto view = matrix.getView();
@@ -63,7 +63,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   view.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   view.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -72,10 +72,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..32612320889fa6ce22a6dcc72b4fc5d2610b64fa
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cpp
index 6793d8bb29969118bd8bcce954d09364db730618..f6dee85f648d2f9f71cb7f63608dfa44cc432511 100644
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cpp
@@ -5,26 +5,26 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     * 0  0 / 1  .  .  .  . \  -> { 0, 0, 1 }
     *    0 | 2  1  .  .  . |  -> { 0, 2, 1 }
     *      | 3  2  1  .  . |  -> { 3, 2, 1 }
     *      | .  3  2  1  . |  -> { 3, 2, 1 }
-    *      \ .  .  3  2  1 /  -> { 3, 2, 1 } 
-    * 
+    *      \ .  .  3  2  1 /  -> { 3, 2, 1 }
+    *
     * The diagonals offsets are { -2, -1, 0 }.
     */
    TNL::Matrices::MultidiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { -2, -1, 0 },  // diagonals offsets
       { { 0, 0, 1 },  // matrix elements
-        { 0, 2, 1 }, 
-        { 3, 2, 1 }, 
+        { 0, 2, 1 },
+        { 3, 2, 1 },
         { 3, 2, 1 },
         { 3, 2, 1 } } );
    auto view = matrix.getView();
@@ -63,7 +63,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   view.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   view.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -72,10 +72,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..1ebbea34c787e67ae85e14ebf463c1ae7a98dfbf
--- /dev/null
+++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cu
@@ -0,0 +1 @@
+MultidiagonalMatrixViewExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu
deleted file mode 120000
index 44df3d3ed38482b4495dcf4aa49f24fc09955650..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-MultidiagonalMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt
index f0f62f49e779b8f2feda5f85f5cfbe16b2200ea3..c2db3879eea91791f4ee5239e3cb1acda1092702 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt
+++ b/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt
@@ -1,326 +1,53 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_1_cuda SparseMatrixExample_Constructor_init_list_1.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_1_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_1.out
-                       OUTPUT SparseMatrixExample_Constructor_init_list_1.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2_cuda SparseMatrixExample_Constructor_init_list_2.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out
-                       OUTPUT SparseMatrixExample_Constructor_init_list_2.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_rowCapacities_vector_cuda SparseMatrixExample_Constructor_rowCapacities_vector.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_rowCapacities_vector_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_rowCapacities_vector.out
-                       OUTPUT SparseMatrixExample_Constructor_rowCapacities_vector.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map_cuda SparseMatrixExample_Constructor_std_map.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out
-                       OUTPUT SparseMatrixExample_Constructor_std_map.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getSerializationType_cuda SparseMatrixExample_getSerializationType.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getSerializationType_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getSerializationType.out
-                       OUTPUT SparseMatrixExample_getSerializationType.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities_cuda SparseMatrixExample_setRowCapacities.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out
-                       OUTPUT SparseMatrixExample_setRowCapacities.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_cuda SparseMatrixExample_setElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out
-                       OUTPUT SparseMatrixExample_setElements.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_map_cuda SparseMatrixExample_setElements_map.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out
-                       OUTPUT SparseMatrixExample_setElements_map.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getCompressedRowLengths_cuda SparseMatrixExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getCompressedRowLengths.out
-                       OUTPUT SparseMatrixExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getConstRow_cuda SparseMatrixExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getConstRow.out
-                       OUTPUT SparseMatrixExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getRow_cuda SparseMatrixExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getRow.out
-                       OUTPUT SparseMatrixExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElement_cuda SparseMatrixExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out
-                       OUTPUT SparseMatrixExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_addElement_cuda SparseMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out
-                       OUTPUT SparseMatrixExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_getElement_cuda SparseMatrixExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getElement.out
-                       OUTPUT SparseMatrixExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_rowsReduction_cuda SparseMatrixExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction.out
-                       OUTPUT SparseMatrixExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_allRowsReduction_cuda SparseMatrixExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_allRowsReduction.out
-                       OUTPUT SparseMatrixExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_forElements_cuda SparseMatrixExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forElements.out
-                       OUTPUT SparseMatrixExample_forElements.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_forEachElement_cuda SparseMatrixExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forEachElement.out
-                       OUTPUT SparseMatrixExample_forEachElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getSerializationType_cuda SparseMatrixViewExample_getSerializationType.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getSerializationType_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getSerializationType.out
-                       OUTPUT SparseMatrixViewExample_getSerializationType.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getCompressedRowLengths_cuda SparseMatrixViewExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT SparseMatrixViewExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getConstRow_cuda SparseMatrixViewExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getConstRow.out
-                       OUTPUT SparseMatrixViewExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getRow_cuda SparseMatrixViewExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getRow.out
-                       OUTPUT SparseMatrixViewExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_setElement_cuda SparseMatrixViewExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out
-                       OUTPUT SparseMatrixViewExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_addElement_cuda SparseMatrixViewExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_addElement_cuda >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_addElement.out
-                       OUTPUT SparseMatrixViewExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getElement_cuda SparseMatrixViewExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getElement.out
-                       OUTPUT SparseMatrixViewExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_rowsReduction_cuda SparseMatrixViewExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_rowsReduction.out
-                       OUTPUT SparseMatrixViewExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_allRowsReduction_cuda SparseMatrixViewExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_allRowsReduction.out
-                       OUTPUT SparseMatrixViewExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_forElements_cuda SparseMatrixViewExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forElements.out
-                       OUTPUT SparseMatrixViewExample_forElements.out )
+set( COMMON_EXAMPLES
+   SparseMatrixExample_Constructor_init_list_1
+   SparseMatrixExample_Constructor_init_list_2
+   SparseMatrixExample_Constructor_rowCapacities_vector
+   SparseMatrixExample_Constructor_std_map
+   SparseMatrixExample_getSerializationType
+   SparseMatrixExample_setRowCapacities
+   SparseMatrixExample_setElements
+   SparseMatrixExample_setElements_map
+   SparseMatrixExample_getCompressedRowLengths
+   SparseMatrixExample_getConstRow
+   SparseMatrixExample_getRow
+   SparseMatrixExample_setElement
+   SparseMatrixExample_addElement
+   SparseMatrixExample_getElement
+   SparseMatrixExample_reduceRows
+   SparseMatrixExample_reduceAllRows
+   SparseMatrixExample_forElements
+   SparseMatrixExample_forAllElements
+   SparseMatrixExample_forRows
+   SparseMatrixViewExample_getSerializationType
+   SparseMatrixViewExample_getCompressedRowLengths
+   SparseMatrixViewExample_getConstRow
+   SparseMatrixViewExample_getRow
+   SparseMatrixViewExample_setElement
+   SparseMatrixViewExample_addElement
+   SparseMatrixViewExample_getElement
+   SparseMatrixViewExample_reduceRows
+   SparseMatrixViewExample_reduceAllRows
+   SparseMatrixViewExample_forElements
+   SparseMatrixViewExample_forRows
+   SparseMatrixViewExample_forAllElements
+)
 
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_forEachElement_cuda SparseMatrixViewExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forEachElement.out
-                       OUTPUT SparseMatrixViewExample_forEachElement.out )
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
 
+IF( BUILD_CUDA )
+   ADD_CUSTOM_TARGET( RunSparseMatricesExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
 ELSE()
-   ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_1 SparseMatrixExample_Constructor_init_list_1.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_1 >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_1.out
-                       OUTPUT SparseMatrixExample_Constructor_init_list_1.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2 SparseMatrixExample_Constructor_init_list_2.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2 >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out
-                       OUTPUT SparseMatrixExample_Constructor_init_list_2.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_Constructor_rowCapacities_vector SparseMatrixExample_Constructor_rowCapacities_vector.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_rowCapacities_vector >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_rowCapacities_vector.out
-                       OUTPUT SparseMatrixExample_Constructor_rowCapacities_vector.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map SparseMatrixExample_Constructor_std_map.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out
-                       OUTPUT SparseMatrixExample_Constructor_std_map.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_getSerializationType SparseMatrixExample_getSerializationType.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getSerializationType >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getSerializationType.out
-                       OUTPUT SparseMatrixExample_getSerializationType.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities SparseMatrixExample_setRowCapacities.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out
-                       OUTPUT SparseMatrixExample_setRowCapacities.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_setElements SparseMatrixExample_setElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out
-                       OUTPUT SparseMatrixExample_setElements.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_setElements_map SparseMatrixExample_setElements_map.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out
-                       OUTPUT SparseMatrixExample_setElements_map.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_getCompressedRowLengths SparseMatrixExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getCompressedRowLengths.out
-                       OUTPUT SparseMatrixExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_getConstRow SparseMatrixExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getConstRow.out
-                       OUTPUT SparseMatrixExample_getConstRow.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_getRow SparseMatrixExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getRow.out
-                       OUTPUT SparseMatrixExample_getRow.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_setElement SparseMatrixExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out
-                       OUTPUT SparseMatrixExample_setElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_addElement SparseMatrixExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out
-                       OUTPUT SparseMatrixExample_addElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_getElement SparseMatrixExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getElement.out
-                       OUTPUT SparseMatrixExample_getElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_rowsReduction SparseMatrixExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction.out
-                       OUTPUT SparseMatrixExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_allRowsReduction SparseMatrixExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_allRowsReduction.out
-                       OUTPUT SparseMatrixExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_forElements SparseMatrixExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forElements.out
-                       OUTPUT SparseMatrixExample_forElements.out )
-
-   ADD_EXECUTABLE( SparseMatrixExample_forEachElement SparseMatrixExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forEachElement.out
-                       OUTPUT SparseMatrixExample_forEachElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_getSerializationType SparseMatrixViewExample_getSerializationType.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getSerializationType >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getSerializationType.out
-                       OUTPUT SparseMatrixViewExample_getSerializationType.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_getCompressedRowLengths SparseMatrixViewExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT SparseMatrixViewExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_getConstRow SparseMatrixViewExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getConstRow.out
-                       OUTPUT SparseMatrixViewExample_getConstRow.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_getRow SparseMatrixViewExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getRow.out
-                       OUTPUT SparseMatrixViewExample_getRow.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_setElement SparseMatrixViewExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out
-                       OUTPUT SparseMatrixViewExample_setElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_addElement SparseMatrixViewExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_addElement.out
-                       OUTPUT SparseMatrixViewExample_addElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_getElement SparseMatrixViewExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getElement.out
-                       OUTPUT SparseMatrixViewExample_getElement.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_rowsReduction SparseMatrixViewExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_rowsReduction.out
-                       OUTPUT SparseMatrixViewExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_allRowsReduction SparseMatrixViewExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_allRowsReduction.out
-                       OUTPUT SparseMatrixViewExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_forElements SparseMatrixViewExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forElements.out
-                       OUTPUT SparseMatrixViewExample_forElements.out )
-
-   ADD_EXECUTABLE( SparseMatrixViewExample_forEachElement SparseMatrixViewExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forEachElement.out
-                       OUTPUT SparseMatrixViewExample_forEachElement.out )
-
+   ADD_CUSTOM_TARGET( RunSparseMatricesExamples ALL DEPENDS ${HOST_OUTPUTS} )
 ENDIF()
-
-ADD_CUSTOM_TARGET( RunSparseMatricesExamples ALL DEPENDS
-   SparseMatrixExample_Constructor_init_list_1.out
-   SparseMatrixExample_Constructor_init_list_2.out
-   SparseMatrixExample_Constructor_rowCapacities_vector.out
-   SparseMatrixExample_Constructor_std_map.out
-   SparseMatrixExample_getSerializationType.out
-   SparseMatrixExample_setRowCapacities.out
-   SparseMatrixExample_setElements.out
-   SparseMatrixExample_setElements_map.out
-   SparseMatrixExample_getCompressedRowLengths.out
-   SparseMatrixExample_getConstRow.out
-   SparseMatrixExample_getRow.out
-   SparseMatrixExample_setElement.out
-   SparseMatrixExample_addElement.out
-   SparseMatrixExample_getElement.out
-   SparseMatrixExample_rowsReduction.out
-   SparseMatrixExample_allRowsReduction.out
-   SparseMatrixExample_forElements.out
-   SparseMatrixExample_forEachElement.out
-   SparseMatrixViewExample_getSerializationType.out
-   SparseMatrixViewExample_getCompressedRowLengths.out
-   SparseMatrixViewExample_getConstRow.out
-   SparseMatrixViewExample_getRow.out
-   SparseMatrixViewExample_setElement.out
-   SparseMatrixViewExample_addElement.out
-   SparseMatrixViewExample_getElement.out
-   SparseMatrixViewExample_rowsReduction.out
-   SparseMatrixViewExample_allRowsReduction.out
-   SparseMatrixViewExample_forElements.out
-   SparseMatrixViewExample_forEachElement.out
-)
-
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu
deleted file mode 120000
index f087b816fa0cd3b657956475bb9c4c0f3f9769dc..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forEachElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllElements.cpp
similarity index 85%
rename from Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllElements.cpp
index 059f0cea00d0a51323b5329fa0bb503dfec00745..c603fe32f9d345063975cbbae8ada8859c9285df 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 );
 
@@ -19,17 +19,17 @@ void forEachElementExample()
       }
    };
 
-   matrix.forEachElement( f );
+   matrix.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllElements.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..e639034def091a09370a8c35d9bfd7acada55147
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllElements.cu
@@ -0,0 +1 @@
+SparseMatrixExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forEachElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forEachElement.cu
deleted file mode 120000
index ea7c8fde8bfd9f3559f6132b315f217aafbd4f84..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1201e59ba4d758a23b70277167a8edda89b0253
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
@@ -0,0 +1,66 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements):
+    *
+    *   /  2  .  .  .  . \
+    *   |  1  2  1  .  . |
+    *   |  .  1  2  1. . |
+    *   |  .  .  1  2  1 |
+    *   \  .  .  .  .  2 /
+    */
+   const int size( 5 );
+   using MatrixType = TNL::Matrices::SparseMatrix< double, Device >;
+   MatrixType matrix( { 1, 3, 3, 3, 1 }, size );
+   using RowView = typename MatrixType::RowView;
+
+   /***
+    * Set the matrix elements.
+    */
+   auto f = [=] __cuda_callable__ ( RowView& row ) mutable {
+      const int rowIdx = row.getRowIndex();
+      if( rowIdx == 0 )
+         row.setElement( 0, rowIdx, 2.0 );        // diagonal element
+      else if( rowIdx == size - 1 )
+         row.setElement( 0, rowIdx, 2.0 );        // diagonal element
+      else
+      {
+         row.setElement( 0, rowIdx - 1, 1.0 );   // elements below the diagonal
+         row.setElement( 1, rowIdx, 2.0 );        // diagonal element
+         row.setElement( 2, rowIdx + 1, 1.0 );   // elements above the diagonal
+      }
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+
+   /***
+    * Divide each matrix row by a sum of all elements in the row - with use of iterators.
+    */
+   matrix.forAllRows( [=] __cuda_callable__ ( RowView& row ) mutable {
+      double sum( 0.0 );
+      for( auto element : row )
+         sum += element.value();
+      for( auto element: row )
+         element.value() /= sum;
+   } );
+   std::cout << "Divide each matrix row by a sum of all elements in the row ... " << std::endl;
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..87c20fbe0e9e4ca72cd80150073726e21813b0cf
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu
@@ -0,0 +1 @@
+SparseMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cpp
similarity index 88%
rename from Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cpp
index 9ce31f09e42936f4c1a754b7cc94978a1e8ab1d4..b9b9f2c20d3f31295aa722b32dba03d9f619404d 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
       { 0, 0, 1 },
@@ -49,7 +49,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -58,10 +58,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "All rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "All rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..105390586696e621e2d510663fc9ce15c22de2dd
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cu
@@ -0,0 +1 @@
+SparseMatrixExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cpp
similarity index 88%
rename from Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cpp
index 20279888a38b10ff41dfef49a44cb4a546f19359..4fb7f8f7d8a9cf0caf40d93c028e6f91bac75715 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
       { 0, 0, 1 },
@@ -48,7 +48,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -57,10 +57,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ceacbc490c05c90e3e57dee6f00276890f51132b
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cu
@@ -0,0 +1 @@
+SparseMatrixExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu
deleted file mode 120000
index 212f162886a3d0bb77aca1b75d7596048409df8b..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu
deleted file mode 120000
index d63cf05c5786aa73da2585050804d26340c75a9f..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forEachElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllElements.cpp
similarity index 85%
rename from Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllElements.cpp
index 99807428dc266900713ed8b28474cc1248823ad4..4000107eb325c6abb8c8f79c379eecbdfd9f2386 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 );
    auto view = matrix.getView();
@@ -20,17 +20,17 @@ void forEachElementExample()
       }
    };
 
-   view.forEachElement( f );
+   view.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllElements.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..3bfc383b3f75b039cc299a1389e1d368daa342fb
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllElements.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forEachElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forEachElement.cu
deleted file mode 120000
index 2d7beae44fb64b9e90e301c15f04795f50a83f00..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixViewExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3f82fe16176b1ba8f24e1b2397a45497b52fbc8a
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   /***
+    * Set the following matrix (dots represent zero matrix elements):
+    *
+    *   /  2  .  .  .  . \
+    *   | -1  2 -1  .  . |
+    *   |  . -1  2 -1. . |
+    *   |  .  . -1  2 -1 |
+    *   \  .  .  .  .  2 /
+    */
+   using MatrixType = TNL::Matrices::SparseMatrix< double, Device >;
+   const int size( 5 );
+   MatrixType matrix( { 1, 3, 3, 3, 1 }, size );
+   auto view = matrix.getView();
+
+   auto f = [=] __cuda_callable__ ( typename MatrixType::RowView& row ) mutable {
+      const int rowIdx = row.getRowIndex();
+      if( rowIdx == 0 )
+         row.setElement( 0, rowIdx, 2.0 );        // diagonal element
+      else if( rowIdx == size - 1 )
+         row.setElement( 0, rowIdx, 2.0 );        // diagonal element
+      else
+      {
+         row.setElement( 0, rowIdx - 1, -1.0 );   // elements below the diagonal
+         row.setElement( 1, rowIdx, 2.0 );        // diagonal element
+         row.setElement( 2, rowIdx + 1, -1.0 );   // elements above the diagonal
+      }
+   };
+
+   /***
+    * Set the matrix elements.
+    */
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Getting matrix rows on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Getting matrix rows on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..5058dc6cfd7adb63f9d10d2699d6b9b530fd6c90
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
index 569fabb6af88457c9820aa8fb483b8f2eeb3fb70..3741b09f0f8518b8ed11a34c3980527011aefe71 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
@@ -8,17 +8,35 @@
 template< typename Device >
 void getRowExample()
 {
-   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 1, 1, 1, 1 }, 5 );
+   /***
+    * Set the following matrix (dots represent zero matrix elements):
+    *
+    *   /  2  .  .  .  . \
+    *   |  1  2  1  .  . |
+    *   |  .  1  2  1. . |
+    *   |  .  .  1  2  1 |
+    *   \  .  .  .  .  2 /
+    */
+   const int size = 5;
+   TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 3, 3, 3, 1 }, size );
    auto view = matrix.getView();
 
-   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
-      auto row = view.getRow( rowIdx );
-      row.setElement( 0, rowIdx, 10 * ( rowIdx + 1 ) );
-   };
-
    /***
     * Set the matrix elements.
     */
+   auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
+      auto row = view.getRow( rowIdx );
+      if( rowIdx == 0 )
+         row.setElement( 0, rowIdx, 2.0 );        // diagonal element
+      else if( rowIdx == size - 1 )
+         row.setElement( 0, rowIdx, 2.0 );        // diagonal element
+      else
+      {
+         row.setElement( 0, rowIdx - 1, 1.0 );   // elements below the diagonal
+         row.setElement( 1, rowIdx, 2.0 );        // diagonal element
+         row.setElement( 2, rowIdx + 1, 1.0 );   // elements above the diagonal
+      }
+   };
    TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f );
    std::cout << matrix << std::endl;
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cpp
similarity index 89%
rename from Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cpp
index 98a03b941697c10c29609febfb5be0a01cd635cb..7ae5f07f5cda7e4a988903a09c5bf4d4daf4c7e4 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void allRowsReduction()
+void reduceAllRows()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
       { 0, 0, 1 },
@@ -50,7 +50,7 @@ void allRowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrixView.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrixView.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -59,10 +59,10 @@ void allRowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "All rows reduction on host:" << std::endl;
-   allRowsReduction< TNL::Devices::Host >();
+   reduceAllRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "All rows reduction on CUDA device:" << std::endl;
-   allRowsReduction< TNL::Devices::Cuda >();
+   reduceAllRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..2af5415653756bf6cbffbd0c62acebbee04cf5ee
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cpp
similarity index 88%
rename from Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cpp
index 69e2ff6fd2cd77d06235e143b44d854da4de414b..aa4295f8a436a27d6a67e28328213d2fe139c982 100644
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, {
       { 0, 0, 1 },
@@ -49,7 +49,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrixView.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrixView.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -58,10 +58,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6d1c7c6c8d4c0718156f31e565ae536a699211f8
--- /dev/null
+++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cu
@@ -0,0 +1 @@
+SparseMatrixViewExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu
deleted file mode 120000
index f244c8372ea90bd142f8fae912ae48d32d3be0fa..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt
index 7094123bbf1be4d1067fcfc827d1167c23bd84a1..7d331938a0044999c269334ea0e4cdb55646468f 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt
@@ -1,269 +1,51 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_Constructor_init_list_1_cuda TridiagonalMatrixExample_Constructor_init_list_1.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_Constructor_init_list_1_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_Constructor_init_list_1.out
-                       OUTPUT TridiagonalMatrixExample_Constructor_init_list_1.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getSerializationType_cuda TridiagonalMatrixExample_getSerializationType.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getSerializationType_cuda > 
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getSerializationType.out
-                       OUTPUT TridiagonalMatrixExample_getSerializationType.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_setElements_cuda TridiagonalMatrixExample_setElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElements_cuda > 
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElements.out
-                       OUTPUT TridiagonalMatrixExample_setElements.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getCompressedRowLengths_cuda TridiagonalMatrixExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getCompressedRowLengths.out
-                       OUTPUT TridiagonalMatrixExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getConstRow_cuda TridiagonalMatrixExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getConstRow.out
-                       OUTPUT TridiagonalMatrixExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getRow_cuda TridiagonalMatrixExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getRow.out
-                       OUTPUT TridiagonalMatrixExample_getRow.out )
-
+set( COMMON_EXAMPLES
+    TridiagonalMatrixExample_Constructor_init_list_1
+    TridiagonalMatrixExample_getSerializationType
+    TridiagonalMatrixExample_setElements
+    TridiagonalMatrixExample_getCompressedRowLengths
+#   TridiagonalMatrixExample_getElementsCount
+    TridiagonalMatrixExample_getConstRow
+    TridiagonalMatrixExample_getRow
 # This example does not work with nvcc 10.1. Restore it here when it works.
-#   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_setElement_cuda TridiagonalMatrixExample_setElement.cu )
-#   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement_cuda >
-#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out
-#                       OUTPUT TridiagonalMatrixExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_addElement_cuda TridiagonalMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_addElement.out
-                       OUTPUT TridiagonalMatrixExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getElement_cuda TridiagonalMatrixExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getElement.out
-                       OUTPUT TridiagonalMatrixExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_rowsReduction_cuda TridiagonalMatrixExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_rowsReduction.out
-                       OUTPUT TridiagonalMatrixExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_allRowsReduction_cuda TridiagonalMatrixExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_allRowsReduction.out
-                       OUTPUT TridiagonalMatrixExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_forElements_cuda TridiagonalMatrixExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forElements.out
-                       OUTPUT TridiagonalMatrixExample_forElements.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_forEachElement_cuda TridiagonalMatrixExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forEachElement.out
-                       OUTPUT TridiagonalMatrixExample_forEachElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getCompressedRowLengths_cuda TridiagonalMatrixViewExample_getCompressedRowLengths.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getCompressedRowLengths_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT TridiagonalMatrixViewExample_getCompressedRowLengths.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getConstRow_cuda TridiagonalMatrixViewExample_getConstRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getConstRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getConstRow.out
-                       OUTPUT TridiagonalMatrixViewExample_getConstRow.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getRow_cuda TridiagonalMatrixViewExample_getRow.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getRow_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getRow.out
-                       OUTPUT TridiagonalMatrixViewExample_getRow.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_setElement_cuda TridiagonalMatrixViewExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_setElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_setElement.out
-                       OUTPUT TridiagonalMatrixViewExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_addElement_cuda TridiagonalMatrixViewExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_addElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_addElement.out
-                       OUTPUT TridiagonalMatrixViewExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getElement_cuda TridiagonalMatrixViewExample_getElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getElement_cuda >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getElement.out
-                       OUTPUT TridiagonalMatrixViewExample_getElement.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_rowsReduction_cuda TridiagonalMatrixViewExample_rowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_rowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_rowsReduction.out
-                       OUTPUT TridiagonalMatrixViewExample_rowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_allRowsReduction_cuda TridiagonalMatrixViewExample_allRowsReduction.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_allRowsReduction_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_allRowsReduction.out
-                       OUTPUT TridiagonalMatrixViewExample_allRowsReduction.out )
-
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_forElements_cuda TridiagonalMatrixViewExample_forElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forElements_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forElements.out
-                       OUTPUT TridiagonalMatrixViewExample_forElements.out )
+#   TridiagonalMatrixExample_setElement
+    TridiagonalMatrixExample_addElement
+    TridiagonalMatrixExample_getElement
+    TridiagonalMatrixExample_reduceRows
+    TridiagonalMatrixExample_reduceAllRows
+    TridiagonalMatrixExample_forElements
+    TridiagonalMatrixExample_forAllElements
+    TridiagonalMatrixExample_forRows
+#   TridiagonalMatrixViewExample_constructor
+    TridiagonalMatrixViewExample_getCompressedRowLengths
+#   TridiagonalMatrixViewExample_getElementsCount
+    TridiagonalMatrixViewExample_getConstRow
+    TridiagonalMatrixViewExample_getRow
+    TridiagonalMatrixViewExample_setElement
+    TridiagonalMatrixViewExample_addElement
+    TridiagonalMatrixViewExample_getElement
+    TridiagonalMatrixViewExample_reduceRows
+    TridiagonalMatrixViewExample_reduceAllRows
+    TridiagonalMatrixViewExample_forElements
+    TridiagonalMatrixViewExample_forAllElements
+    TridiagonalMatrixViewExample_forRows
+)
 
-   CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_forEachElement_cuda TridiagonalMatrixViewExample_forEachElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forEachElement_cuda >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forEachElement.out
-                       OUTPUT TridiagonalMatrixViewExample_forEachElement.out )
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
 
+IF( BUILD_CUDA )
+   ADD_CUSTOM_TARGET( RunTridiagonalMatricesExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
 ELSE()
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_Constructor_init_list_1 TridiagonalMatrixExample_Constructor_init_list_1.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_Constructor_init_list_1 >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_Constructor_init_list_1.out
-                       OUTPUT TridiagonalMatrixExample_Constructor_init_list_1.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_getSerializationType TridiagonalMatrixExample_getSerializationType.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getSerializationType >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getSerializationType.out
-                       OUTPUT TridiagonalMatrixExample_getSerializationType.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_setElements TridiagonalMatrixExample_setElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElements >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElements.out
-                       OUTPUT TridiagonalMatrixExample_setElements.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_getCompressedRowLengths TridiagonalMatrixExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getCompressedRowLengths.out
-                       OUTPUT TridiagonalMatrixExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_getConstRow TridiagonalMatrixExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getConstRow.out
-                       OUTPUT TridiagonalMatrixExample_getConstRow.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_getRow TridiagonalMatrixExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getRow.out
-                       OUTPUT TridiagonalMatrixExample_getRow.out )
-
-#  This example does not work with nvcc 10.1. Restore it here when it works.
-#   ADD_EXECUTABLE( TridiagonalMatrixExample_setElement TridiagonalMatrixExample_setElement.cpp )
-#   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement >
-#                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out
-#                       OUTPUT TridiagonalMatrixExample_setElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_addElement TridiagonalMatrixExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_addElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_addElement.out
-                       OUTPUT TridiagonalMatrixExample_addElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_getElement TridiagonalMatrixExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getElement.out
-                       OUTPUT TridiagonalMatrixExample_getElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_rowsReduction TridiagonalMatrixExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_rowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_rowsReduction.out
-                       OUTPUT TridiagonalMatrixExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_allRowsReduction TridiagonalMatrixExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_allRowsReduction >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_allRowsReduction.out
-                       OUTPUT TridiagonalMatrixExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_forElements TridiagonalMatrixExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forElements >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forElements.out
-                       OUTPUT TridiagonalMatrixExample_forElements.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_forEachElement TridiagonalMatrixExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forEachElement.out
-                       OUTPUT TridiagonalMatrixExample_forEachElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getCompressedRowLengths TridiagonalMatrixViewExample_getCompressedRowLengths.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getCompressedRowLengths >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getCompressedRowLengths.out
-                       OUTPUT TridiagonalMatrixViewExample_getCompressedRowLengths.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getConstRow TridiagonalMatrixViewExample_getConstRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getConstRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getConstRow.out
-                       OUTPUT TridiagonalMatrixViewExample_getConstRow.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getRow TridiagonalMatrixViewExample_getRow.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getRow >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getRow.out
-                       OUTPUT TridiagonalMatrixViewExample_getRow.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_setElement TridiagonalMatrixViewExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_setElement.out
-                       OUTPUT TridiagonalMatrixViewExample_setElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_addElement TridiagonalMatrixViewExample_addElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_addElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_addElement.out
-                       OUTPUT TridiagonalMatrixViewExample_addElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_getElement TridiagonalMatrixViewExample_getElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getElement.out
-                       OUTPUT TridiagonalMatrixViewExample_getElement.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_rowsReduction TridiagonalMatrixViewExample_rowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_rowsReduction >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_rowsReduction.out
-                       OUTPUT TridiagonalMatrixViewExample_rowsReduction.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_allRowsReduction TridiagonalMatrixViewExample_allRowsReduction.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_allRowsReduction >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_allRowsReduction.out
-                       OUTPUT TridiagonalMatrixViewExample_allRowsReduction.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_forElements TridiagonalMatrixViewExample_forElements.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forElements >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forElements.out
-                       OUTPUT TridiagonalMatrixViewExample_forElements.out )
-
-   ADD_EXECUTABLE( TridiagonalMatrixViewExample_forEachElement TridiagonalMatrixViewExample_forEachElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forEachElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forEachElement.out
-                       OUTPUT TridiagonalMatrixViewExample_forEachElement.out )
-
+   ADD_CUSTOM_TARGET( RunTridiagonalMatricesExamples ALL DEPENDS ${HOST_OUTPUTS} )
 ENDIF()
-
-   ADD_EXECUTABLE( TridiagonalMatrixExample_setElement TridiagonalMatrixExample_setElement.cpp )
-   ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out
-                       OUTPUT TridiagonalMatrixExample_setElement.out )
-
-
-ADD_CUSTOM_TARGET( RunTridiagonalMatricesExamples ALL DEPENDS
-   TridiagonalMatrixExample_Constructor_init_list_1.out
-   TridiagonalMatrixExample_getSerializationType.out
-   TridiagonalMatrixExample_setElements.out
-   TridiagonalMatrixExample_getCompressedRowLengths.out
-   TridiagonalMatrixExample_getConstRow.out
-   TridiagonalMatrixExample_getRow.out
-   TridiagonalMatrixExample_setElement.out
-   TridiagonalMatrixExample_addElement.out
-   TridiagonalMatrixExample_getElement.out
-   TridiagonalMatrixExample_rowsReduction.out
-   TridiagonalMatrixExample_allRowsReduction.out
-   TridiagonalMatrixExample_forElements.out
-   TridiagonalMatrixExample_forEachElement.out
-   TridiagonalMatrixViewExample_getCompressedRowLengths.out
-   TridiagonalMatrixViewExample_getConstRow.out
-   TridiagonalMatrixViewExample_getRow.out
-   TridiagonalMatrixViewExample_setElement.out
-   TridiagonalMatrixViewExample_addElement.out
-   TridiagonalMatrixViewExample_getElement.out
-   TridiagonalMatrixViewExample_rowsReduction.out
-   TridiagonalMatrixViewExample_allRowsReduction.out
-   TridiagonalMatrixViewExample_forElements.out
-   TridiagonalMatrixViewExample_forEachElement.out
-)
-
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu
deleted file mode 120000
index 795c4febffbbb109b99553e8437c33db942bcab7..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-TridiagonalMatrixExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forEachElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllElements.cpp
similarity index 91%
rename from Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllElements.cpp
index 93b56f850fdeb4517c3473b3f160cd79cea2ae30..c29b439a6b85274f863b110e80aea7ca8537fbb3 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
@@ -37,17 +37,17 @@ void forEachElementExample()
        */
       value = 3 - localIdx;
    };
-   matrix.forEachElement( f );
+   matrix.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllElements.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..704b370b723d4b6462fcb5e090005cbb5f8926b3
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllElements.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forEachElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forEachElement.cu
deleted file mode 120000
index 13b73c374db70b1a0c11a4617bb280bf8fc41543..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-TridiagonalMatrixExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ae4f32644154902371fe4bd14b3aa9cec427113
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
@@ -0,0 +1,60 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   using RowView = typename MatrixType::RowView;
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    *
+    *    0 /  2  .  .  .  . \  -> { 0, 0, 1 }
+    *      | -1  2 -1  .  . |  -> { 0, 2, 1 }
+    *      |  . -1  2 -1. . |  -> { 3, 2, 1 }
+    *      |  .  . -1  2 -1 |  -> { 3, 2, 1 }
+    *      \  .  .  .  .  2 /  -> { 3, 2, 1 }
+    *
+    */
+    const int size = 5;
+    MatrixType matrix( size, size );
+
+   auto f = [=] __cuda_callable__ ( RowView& row ) {
+      const int& rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < size - 1 )        // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+
+   /***
+    * Compute sum of elements in each row and store it into a vector.
+    */
+   TNL::Containers::Vector< double, Device > sum_vector( size );
+   auto sum_view = sum_vector.getView();
+   matrix.forAllRows( [=] __cuda_callable__ ( RowView& row ) mutable {
+      double sum( 0.0 );
+      for( auto element : row )
+         sum += TNL::abs( element.value() );
+      sum_view[ row.getRowIndex() ] = sum;
+   } );
+
+   std::cout << "Sums in matrix rows = " << sum_vector << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..a187b1e67da9619090be45c2ec69f6709bac9b88
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cpp
index 043dcc82efc2203f8f9b52a4a90a70d7a1e25ae9..65acc3c47760173983b2b941543314ce3189d378 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cpp
@@ -5,24 +5,24 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
     *    | 2  1  3  .  . |   -> { 2, 1, 3 }
     *    | .  2  1  3  . |   -> { 2, 1, 3 }
     *    | .  .  2  1  3 |   -> { 2, 1, 3 }
-    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
-    * 
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 }
+    *
     */
    TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { { 0, 1, 3 },  // matrix elements
-        { 2, 1, 3 }, 
-        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 },
         { 2, 1, 3 },
         { 2, 1, 3 } } );
 
@@ -60,7 +60,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -69,10 +69,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..e8001f966e25f5258a571e0092a54e24336011b9
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cpp
similarity index 85%
rename from Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cpp
index aae0bd4e3adf87a37090a05d934328ec2e641204..c985e12b7e71df3a47dd3df6ffadb0eef48d7e87 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cpp
@@ -5,24 +5,24 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
     *    | 2  1  3  .  . |   -> { 2, 1, 3 }
     *    | .  2  1  3  . |   -> { 2, 1, 3 }
     *    | .  .  2  1  3 |   -> { 2, 1, 3 }
-    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
-    * 
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 }
+    *
     */
    TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { { 0, 1, 3 },  // matrix elements
-        { 2, 1, 3 }, 
-        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 },
         { 2, 1, 3 },
         { 2, 1, 3 } } );
 
@@ -60,7 +60,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -69,10 +69,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..7b5585d88cf932b7f79d8f19f8dcfe99f2783706
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu
deleted file mode 120000
index 69a58007cbcbfcf49bf5678a9b208274cb6de91b..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-TridiagonalMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu
deleted file mode 120000
index 7b330650fc19e8db1bd2c844ef405e2b79f3a133..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-TridiagonalMatrixViewExample_allRowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forEachElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllElements.cpp
similarity index 91%
rename from Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forEachElement.cpp
rename to Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllElements.cpp
index a3a48223077da9ed4c7a9845847ebac70f1bc315..0ef4304623f9d5e1a6330c04c46ef30332f9fd89 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forEachElement.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllElements.cpp
@@ -4,7 +4,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void forEachElementExample()
+void forAllElementsExample()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
@@ -38,17 +38,17 @@ void forEachElementExample()
        */
       value = 3 - localIdx;
    };
-   view.forEachElement( f );
+   view.forAllElements( f );
    std::cout << matrix << std::endl;
 }
 
 int main( int argc, char* argv[] )
 {
    std::cout << "Creating matrix on host: " << std::endl;
-   forEachElementExample< TNL::Devices::Host >();
+   forAllElementsExample< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Creating matrix on CUDA device: " << std::endl;
-   forEachElementExample< TNL::Devices::Cuda >();
+   forAllElementsExample< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllElements.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllElements.cu
new file mode 120000
index 0000000000000000000000000000000000000000..4651fbd7fe30c5c08106c933ad8ff1a054b893bd
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllElements.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_forAllElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forEachElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forEachElement.cu
deleted file mode 120000
index 98972cb8b5a6b04ab28b71f6e6953cb1b3f34734..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forEachElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-TridiagonalMatrixViewExample_forEachElement.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5607fac633a1c72afeeff57fe733663fc87e160d
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
@@ -0,0 +1,46 @@
+#include <iostream>
+#include <TNL/Matrices/TridiagonalMatrix.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void forRowsExample()
+{
+   using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    *
+    *    0 /  2  .  .  .  . \  -> { 0, 0, 1 }
+    *      | -1  2 -1  .  . |  -> { 0, 2, 1 }
+    *      |  . -1  2 -1. . |  -> { 3, 2, 1 }
+    *      |  .  . -1  2 -1 |  -> { 3, 2, 1 }
+    *      \  .  .  .  .  2 /  -> { 3, 2, 1 }
+    *
+    * The diagonals offsets are { -1, 0, 1 }.
+    */
+    const int size = 5;
+    MatrixType matrix( size, size );
+
+   auto f = [=] __cuda_callable__ ( typename MatrixType::RowView& row ) {
+      const int& rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setElement( 0, -1.0 );  // elements below the diagonal
+      row.setElement( 1, 2.0 );      // elements on the diagonal
+      if( rowIdx < size - 1 )        // elements above the diagonal
+         row.setElement( 2, -1.0 );
+   };
+   matrix.forAllRows( f );
+   std::cout << matrix << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Creating matrix on host: " << std::endl;
+   forRowsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Creating matrix on CUDA device: " << std::endl;
+   forRowsExample< TNL::Devices::Cuda >();
+#endif
+}
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..ea70e5b9e29793bbfda1ea1eb88b61bfa141eb41
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_forRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
index 641149e050ffe738576b63a73f1cc792457ebf56..8f8fe0b6954562070d47506884b880f8354a38b4 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
@@ -7,11 +7,23 @@
 template< typename Device >
 void getRowExample()
 {
+   /***
+    * Set the following matrix (dots represent zero matrix elements and zeros are
+    * padding zeros for memory alignment):
+    *
+    *    0 /  2 -1  .  .  . \  -> { 0, 0, 1 }
+    *      | -1  2 -1  .  . |  -> { 0, 2, 1 }
+    *      |  . -1  2 -1. . |  -> { 3, 2, 1 }
+    *      |  .  . -1  2 -1 |  -> { 3, 2, 1 }
+    *      \  .  .  . -1  2 /  -> { 3, 2, 1 }
+    *
+    */
+
    const int matrixSize( 5 );
    using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >;
    MatrixType matrix(
       matrixSize,  // number of matrix rows
-      matrixSize  // number of matrix columns
+      matrixSize   // number of matrix columns
    );
    auto view = matrix.getView();
 
@@ -19,7 +31,7 @@ void getRowExample()
       auto row = view.getRow( rowIdx );
 
       if( rowIdx > 0 )
-         row.setElement( 0, -1.0 );  // elements below the diagonal
+         row.setElement( 0, -1.0 );   // elements below the diagonal
       row.setElement( 1, 2.0 );      // elements on the diagonal
       if( rowIdx < matrixSize - 1 )  // elements above the diagonal
          row.setElement( 2, -1.0 );
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
rename to Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cpp
index bacb98beecc1ac87be73302896e60ef8e498bc4d..bef8037811ed29f7f2c76b2ca1f31f7225e81c8d 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cpp
@@ -5,24 +5,24 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
     *    | 2  1  3  .  . |   -> { 2, 1, 3 }
     *    | .  2  1  3  . |   -> { 2, 1, 3 }
     *    | .  .  2  1  3 |   -> { 2, 1, 3 }
-    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
-    * 
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 }
+    *
     */
    TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { { 0, 1, 3 },  // matrix elements
-        { 2, 1, 3 }, 
-        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 },
         { 2, 1, 3 },
         { 2, 1, 3 } } );
    auto view = matrix.getView();
@@ -61,7 +61,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   view.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   view.reduceAllRows( fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -70,10 +70,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..6fc41327c846a03f0110b894e6341c0971655a4b
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_reduceAllRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cpp
similarity index 86%
rename from Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
rename to Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cpp
index fa345292ce4aa5fc629225d910ca90a77bf6be07..89068d48a618f207cdce624c77c84e061e929d5d 100644
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cpp
@@ -5,24 +5,24 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    /***
     * Set the following matrix (dots represent zero matrix elements and zeros are
     * padding zeros for memory alignment):
-    * 
+    *
     *  0 / 1  3  .  .  . \   -> { 0, 1, 3 }
     *    | 2  1  3  .  . |   -> { 2, 1, 3 }
     *    | .  2  1  3  . |   -> { 2, 1, 3 }
     *    | .  .  2  1  3 |   -> { 2, 1, 3 }
-    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 } 
-    * 
+    *    \ .  .  .  2  1 / 0 -> { 2, 1, 0 }
+    *
     */
    TNL::Matrices::TridiagonalMatrix< double, Device > matrix (
       5,              // number of matrix columns
       { { 0, 1, 3 },  // matrix elements
-        { 2, 1, 3 }, 
-        { 2, 1, 3 }, 
+        { 2, 1, 3 },
+        { 2, 1, 3 },
         { 2, 1, 3 },
         { 2, 1, 3 } } );
    auto view = matrix.getView();
@@ -61,7 +61,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   view.rowsReduction( 0, view.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   view.reduceRows( 0, view.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "The matrix reads as: " << std::endl << matrix << std::endl;
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
@@ -70,10 +70,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cu
new file mode 120000
index 0000000000000000000000000000000000000000..13f298463c15ab7e471faa75d0de73cdf87d09f8
--- /dev/null
+++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cu
@@ -0,0 +1 @@
+TridiagonalMatrixViewExample_reduceRows.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu
deleted file mode 120000
index f749c1ef47ec34830d51be3e3dd39d32347b3c8f..0000000000000000000000000000000000000000
--- a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu
+++ /dev/null
@@ -1 +0,0 @@
-TridiagonalMatrixViewExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Arrays/ArrayViewForElements.cpp b/Documentation/Tutorials/Arrays/ArrayViewForElements.cpp
index a78d27b8081f7c2c00faa8d5405453a8649d5ecb..c4ed37bf16ed06eb9a1067cef5ccee88fc0e81ce 100644
--- a/Documentation/Tutorials/Arrays/ArrayViewForElements.cpp
+++ b/Documentation/Tutorials/Arrays/ArrayViewForElements.cpp
@@ -18,7 +18,7 @@ int main( int argc, char* argv[] )
     * Create an ArrayView and use it for initiation
     */
    auto a_view = a.getView();
-   a_view.forEachElement( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
+   a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
 
    /****
     * Initiate elements of b with indexes 0-4 using a_view
diff --git a/Documentation/Tutorials/Arrays/ContainsValue.cpp b/Documentation/Tutorials/Arrays/ContainsValue.cpp
index 6211e26b8fa36e4e216e3f787d0e4f928525715f..4b726a7bd8a21c32699effd10bb58046dac976a5 100644
--- a/Documentation/Tutorials/Arrays/ContainsValue.cpp
+++ b/Documentation/Tutorials/Arrays/ContainsValue.cpp
@@ -13,7 +13,7 @@ int main( int argc, char* argv[] )
    const int size = 10;
    Array< float, Devices::Cuda > a( size ), b( size );
    a = 0;
-   b.forEachElement( [=] __cuda_callable__ ( int i, float& value ) { value = i; } );
+   b.forAllElements( [=] __cuda_callable__ ( int i, float& value ) { value = i; } );
 
    /****
     * Test the values stored in the arrays
diff --git a/Documentation/Tutorials/Arrays/tutorial_Arrays.md b/Documentation/Tutorials/Arrays/tutorial_Arrays.md
index fd6552df3996045f027fb68bd05953f1b59a2795..ad3016411567666b55ed7eeb375daf7286d3c9f5 100644
--- a/Documentation/Tutorials/Arrays/tutorial_Arrays.md
+++ b/Documentation/Tutorials/Arrays/tutorial_Arrays.md
@@ -84,7 +84,7 @@ Output:
 
 ### Arrays and parallel for
 
-More efficient and still quite simple method for (not only) array elements initiation is with the use of C++ lambda functions and methods `forElements` and `forEachElement`. As an argument a lambda function is passed which is then applied for all elements. Optionally one may define only subinterval of element indexes where the lambda shall be applied. If the underlying array is allocated on GPU, the lambda function is called from CUDA kernel. This is why it is more efficient than use of `setElement`. On the other hand, one must be careful to use only `__cuda_callable__` methods inside the lambda. The use of the methods `forElements` and `forEachElement` is demonstrated in the following example.
+More efficient and still quite simple method for (not only) array elements initiation is with the use of C++ lambda functions and methods `forElements` and `forAllElements`. As an argument a lambda function is passed which is then applied for all elements. Optionally one may define only subinterval of element indexes where the lambda shall be applied. If the underlying array is allocated on GPU, the lambda function is called from CUDA kernel. This is why it is more efficient than use of `setElement`. On the other hand, one must be careful to use only `__cuda_callable__` methods inside the lambda. The use of the methods `forElements` and `forAllElements` is demonstrated in the following example.
 
 \include ArrayExample_forElements.cpp
 
diff --git a/Documentation/Tutorials/ForLoops/ParallelForExample_ug.cpp b/Documentation/Tutorials/ForLoops/ParallelForExample_ug.cpp
index a9ff6afb619eef3535791bf515007537d5dc89c0..cf91d69ed25f3c51d33eeae677d63fd628a83708 100644
--- a/Documentation/Tutorials/ForLoops/ParallelForExample_ug.cpp
+++ b/Documentation/Tutorials/ForLoops/ParallelForExample_ug.cpp
@@ -35,7 +35,7 @@ int main( int argc, char* argv[] )
     */
    Vector< double, Devices::Host > host_v1( 10 ), host_v2( 10 ), host_result( 10 );
    host_v1 = 1.0;
-   host_v2.forEachElement( []__cuda_callable__ ( int i, double& value ) { value = i; } );
+   host_v2.forAllElements( []__cuda_callable__ ( int i, double& value ) { value = i; } );
    vectorSum( host_v1, host_v2, 2.0, host_result );
    std::cout << "host_v1 = " << host_v1 << std::endl;
    std::cout << "host_v2 = " << host_v2 << std::endl;
@@ -47,7 +47,7 @@ int main( int argc, char* argv[] )
 #ifdef HAVE_CUDA
    Vector< double, Devices::Cuda > cuda_v1( 10 ), cuda_v2( 10 ), cuda_result( 10 );
    cuda_v1 = 1.0;
-   cuda_v2.forEachElement( []__cuda_callable__ ( int i, double& value ) { value = i; } );
+   cuda_v2.forAllElements( []__cuda_callable__ ( int i, double& value ) { value = i; } );
    vectorSum( cuda_v1, cuda_v2, 2.0, cuda_result );
    std::cout << "cuda_v1 = " << cuda_v1 << std::endl;
    std::cout << "cuda_v2 = " << cuda_v2 << std::endl;
diff --git a/Documentation/Tutorials/Matrices/CMakeLists.txt b/Documentation/Tutorials/Matrices/CMakeLists.txt
index 7e3b2b2107b6027a4f115fcbbd2a2cb33538b43d..70b3475d8e8edba185f34843f9f757d4c0c7ecaa 100644
--- a/Documentation/Tutorials/Matrices/CMakeLists.txt
+++ b/Documentation/Tutorials/Matrices/CMakeLists.txt
@@ -1,152 +1,48 @@
-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out 
-                       OUTPUT DenseMatrixExample_Constructor_init_list.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_addElement DenseMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out
-                       OUTPUT DenseMatrixExample_addElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElement DenseMatrixExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out
-                       OUTPUT DenseMatrixExample_setElement.out )
-
-   #CUDA_ADD_EXECUTABLE( DenseMatrixExample_forRows DenseMatrixExample_forRows.cu )
-   #ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows >
-   #                    ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out
-   #                    OUTPUT DenseMatrixExample_forRows.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_vectorProduct DenseMatrixExample_rowsReduction_vectorProduct.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_vectorProduct >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction_vectorProduct.out
-                       OUTPUT DenseMatrixExample_rowsReduction_vectorProduct.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_maxNorm DenseMatrixExample_rowsReduction_maxNorm.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_maxNorm >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction_maxNorm.out
-                       OUTPUT DenseMatrixExample_rowsReduction_maxNorm.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_setElement DenseMatrixViewExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out OUTPUT
-                       DenseMatrixViewExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_data_encapsulation DenseMatrixViewExample_data_encapsulation.cu )
-   ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_data_encapsulation >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_data_encapsulation.out OUTPUT
-                       DenseMatrixViewExample_data_encapsulation.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2 SparseMatrixExample_Constructor_init_list_2.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2 >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out
-                       OUTPUT SparseMatrixExample_Constructor_init_list_2.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities SparseMatrixExample_setRowCapacities.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out
-                       OUTPUT SparseMatrixExample_setRowCapacities.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_rowCapacities_vector SparseMatrixExample_Constructor_rowCapacities_vector.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_rowCapacities_vector >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_rowCapacities_vector.out
-                       OUTPUT SparseMatrixExample_Constructor_rowCapacities_vector.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map SparseMatrixExample_Constructor_std_map.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out
-                       OUTPUT SparseMatrixExample_Constructor_std_map.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements SparseMatrixExample_setElements.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out
-                       OUTPUT SparseMatrixExample_setElements.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_map SparseMatrixExample_setElements_map.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out
-                       OUTPUT SparseMatrixExample_setElements_map.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElement SparseMatrixExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out
-                       OUTPUT SparseMatrixExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_addElement SparseMatrixExample_addElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out
-                       OUTPUT SparseMatrixExample_addElement.out )
-
-#   CUDA_ADD_EXECUTABLE( SparseMatrixExample_forRows SparseMatrixExample_forRows.cu )
-#   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forRows >
-#                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forRows.out
-#                       OUTPUT SparseMatrixExample_forRows.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixExample_rowsReduction_vectorProduct SparseMatrixExample_rowsReduction_vectorProduct.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction_vectorProduct >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction_vectorProduct.out
-                       OUTPUT SparseMatrixExample_rowsReduction_vectorProduct.out )
-
-   CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_setElement SparseMatrixViewExample_setElement.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement >
-                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out
-                       OUTPUT SparseMatrixViewExample_setElement.out )
-
-   CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixExample SymmetricSparseMatrixExample.cu )
-   ADD_CUSTOM_COMMAND( COMMAND SymmetricSparseMatrixExample >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SymmetricSparseMatrixExample.out
-                        OUTPUT SymmetricSparseMatrixExample.out )
-
-   CUDA_ADD_EXECUTABLE( BinarySparseMatrixExample BinarySparseMatrixExample.cu )
-   ADD_CUSTOM_COMMAND( COMMAND BinarySparseMatrixExample >
-                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/BinarySparseMatrixExample.out
-                        OUTPUT BinarySparseMatrixExample.out )
-
+set( COMMON_EXAMPLES
+   BinarySparseMatrixExample
+   DenseMatrixExample_reduceRows_maxNorm
+   DenseMatrixExample_reduceRows_vectorProduct
+   DenseMatrixViewExample_data_encapsulation
+   SparseMatrixExample_reduceRows_vectorProduct
+   SymmetricSparseMatrixExample
+)
 
-   ####
-   # THe following examples/benchmarks run for very long time
-   CUDA_ADD_EXECUTABLE( DenseMatrixSetup_Benchmark_cuda DenseMatrixSetup_Benchmark.cu )
-   CUDA_ADD_EXECUTABLE( SparseMatrixSetup_Benchmark_cuda SparseMatrixSetup_Benchmark.cu )
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark_cuda MultidiagonalMatrixSetup_Benchmark.cu )
+####
+# The following examples/benchmarks run for very long time
+# We just build them and do not run automatically.
+set( LONG_EXAMPLES
+   DenseMatrixSetup_Benchmark
+   MultidiagonalMatrixSetup_Benchmark
+   SparseMatrixSetup_Benchmark
+)
 
-ELSE()
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+   foreach( target IN ITEMS ${LONG_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      #add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      #set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+   foreach( target IN ITEMS ${LONG_EXAMPLES} )
+      add_executable( ${target} ${target}.cpp )
+      #add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      #set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
 
-   ####
-   # THe following examples/benchmarks run for very long time
-   ADD_EXECUTABLE( DenseMatrixSetup_Benchmark DenseMatrixSetup_Benchmark.cpp )
-   ADD_EXECUTABLE( SparseMatrixSetup_Benchmark SparseMatrixSetup_Benchmark.cpp )
-   ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark MultidiagonalMatrixSetup_Benchmark.cpp )
-ENDIF()
 
 IF( BUILD_CUDA )
-ADD_CUSTOM_TARGET( TutorialsMatricesCuda ALL DEPENDS
-   DenseMatrixExample_Constructor_init_list.out
-   DenseMatrixExample_addElement.out
-   DenseMatrixExample_setElement.out
-#   DenseMatrixExample_forRows.out
-   DenseMatrixExample_rowsReduction_vectorProduct.out
-   DenseMatrixExample_rowsReduction_maxNorm.out
-   DenseMatrixViewExample_setElement.out
-   DenseMatrixViewExample_data_encapsulation.out
-   SparseMatrixExample_Constructor_init_list_2.out
-   SparseMatrixExample_setRowCapacities.out
-   SparseMatrixExample_Constructor_std_map.out
-   SparseMatrixExample_setElements.out
-   SparseMatrixExample_setElements_map.out
-   SparseMatrixExample_setElement.out
-#   SparseMatrixExample_forRows.out
-   SparseMatrixExample_rowsReduction_vectorProduct.out
-   SparseMatrixViewExample_setElement.out
-   SymmetricSparseMatrixExample.out
-   BinarySparseMatrixExample.out
- )
+   ADD_CUSTOM_TARGET( RunTutorialsMatricesExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
 ELSE()
-ADD_CUSTOM_TARGET( TutorialsMatrices ALL DEPENDS
-)
-ENDIF()
-#
-#ADD_CUSTOM_TARGET( TutorialsPointers ALL DEPENDS
-#   UniquePointerHostExample.out
-#)
\ No newline at end of file
+   ADD_CUSTOM_TARGET( RunTutorialsMatricesExamples ALL DEPENDS ${HOST_OUTPUTS} )
+ENDIF()
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_Constructor_init_list.cpp b/Documentation/Tutorials/Matrices/DenseMatrixExample_Constructor_init_list.cpp
deleted file mode 120000
index faa270f15a4dc3c4e835a1fc7888d40cd26f4b59..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_Constructor_init_list.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_Constructor_init_list.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_Constructor_init_list.cu
deleted file mode 120000
index e633e76a9b80504e7a2b750eb43655f9215820b8..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_Constructor_init_list.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_addElement.cpp b/Documentation/Tutorials/Matrices/DenseMatrixExample_addElement.cpp
deleted file mode 120000
index c471b0ce3155b8942a78da330767e5bda04259be..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_addElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_addElement.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_addElement.cu
deleted file mode 120000
index 67dd6dced3eb6d3e7149be4907542b77f8326a36..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_addElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_maxNorm.cpp b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_maxNorm.cpp
similarity index 88%
rename from Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_maxNorm.cpp
rename to Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_maxNorm.cpp
index a1837ebc7f9769fedf3fd03456f39130519d76a9..42bfd4800884c9c66b33b9a5d3af4489eb62d054 100644
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_maxNorm.cpp
+++ b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_maxNorm.cpp
@@ -5,7 +5,7 @@
 #include <TNL/Devices/Host.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix {
       {  1,  0,  0,  0,  0 },
@@ -48,7 +48,7 @@ void rowsReduction()
    /***
     * Compute the largest values in each row.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() );
 
    std::cout << "Max. elements in rows are: " << rowMax << std::endl;
    std::cout << "Max. matrix norm is: " << TNL::max( rowMax ) << std::endl;
@@ -57,10 +57,10 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_maxNorm.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_maxNorm.cu
new file mode 120000
index 0000000000000000000000000000000000000000..d58eacc4572a0b247706c6691bc5e5c0c9fa109b
--- /dev/null
+++ b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_maxNorm.cu
@@ -0,0 +1 @@
+DenseMatrixExample_reduceRows_maxNorm.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_vectorProduct.cpp b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_vectorProduct.cpp
similarity index 90%
rename from Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_vectorProduct.cpp
rename to Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_vectorProduct.cpp
index 1dcef95dda4907ef1658a2f0dac4ed06ef38cf1d..27d4956a30b37820d67c99ea68ac35ccc3a742a3 100644
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_vectorProduct.cpp
+++ b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_vectorProduct.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::DenseMatrix< double, Device > matrix {
       {  1,  0,  0,  0,  0 },
@@ -50,7 +50,7 @@ void rowsReduction()
    /***
     * Compute matrix-vector product.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, std::plus<>{}, keep, 0.0 );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, std::plus<>{}, keep, 0.0 );
 
    std::cout << "The matrix reads as:" << std::endl << matrix << std::endl;
    std::cout << "The input vector is:" << x << std::endl;
@@ -60,11 +60,11 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << std::endl;
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_vectorProduct.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_vectorProduct.cu
new file mode 120000
index 0000000000000000000000000000000000000000..7ba31dc3b39495b06cc8b0eddbeae540e0167c94
--- /dev/null
+++ b/Documentation/Tutorials/Matrices/DenseMatrixExample_reduceRows_vectorProduct.cu
@@ -0,0 +1 @@
+DenseMatrixExample_reduceRows_vectorProduct.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_maxNorm.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_maxNorm.cu
deleted file mode 120000
index 04b5e78e1eb43ca3ee016e86de75333c73241041..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_maxNorm.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixExample_rowsReduction_maxNorm.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_vectorProduct.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_vectorProduct.cu
deleted file mode 120000
index 36e05a773c60ccd436358108bc9e0ef6362ad100..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_rowsReduction_vectorProduct.cu
+++ /dev/null
@@ -1 +0,0 @@
-DenseMatrixExample_rowsReduction_vectorProduct.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_setElement.cpp b/Documentation/Tutorials/Matrices/DenseMatrixExample_setElement.cpp
deleted file mode 120000
index cb68721bb18b354fee17a97b3b9ef3815bd54746..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_setElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixExample_setElement.cu b/Documentation/Tutorials/Matrices/DenseMatrixExample_setElement.cu
deleted file mode 120000
index 79539e197f7448949ffe2cde01ccee65348454f6..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixExample_setElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp b/Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp
index 9b346d7be7da01b297dc1f2440c3702d98e4ff97..7696e9d0d7e99e750b6f6d68e63d013c2938655e 100644
--- a/Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp
+++ b/Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp
@@ -52,7 +52,7 @@ void getRow( const int matrixSize, Matrix& matrix )
    auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
       auto row = matrixView.getRow( rowIdx );
       for( int i = 0; i < matrixSize; i++ )
-         row.setElement( i, rowIdx + i );
+         row.setValue( i, rowIdx + i );
    };
    TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f );
 }
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixViewExample_data_encapsulation.cpp b/Documentation/Tutorials/Matrices/DenseMatrixViewExample_data_encapsulation.cpp
index 99cf67583994d0de03487d213ce5bf53a378edf1..79fb9b3d900a2c13b4f3d7eb22fafce2aaf5958c 100644
--- a/Documentation/Tutorials/Matrices/DenseMatrixViewExample_data_encapsulation.cpp
+++ b/Documentation/Tutorials/Matrices/DenseMatrixViewExample_data_encapsulation.cpp
@@ -49,6 +49,17 @@ void encapsulation()
 
    std::cout << "Dense matrix view after elements manipulation:" << std::endl;
    std::cout << matrix << std::endl;
+
+   /***
+    * Do not forget to free allocated memory :)
+    */
+   delete[] host_data;
+   if( std::is_same< Device, TNL::Devices::Host >::value )
+      delete[] data;
+#ifdef HAVE_CUDA
+   else if( std::is_same< Device, TNL::Devices::Cuda >::value )
+      cudaFree( data );
+#endif
 }
 
 int main( int argc, char* argv[] )
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixViewExample_setElement.cpp b/Documentation/Tutorials/Matrices/DenseMatrixViewExample_setElement.cpp
deleted file mode 120000
index a3832e2e8dee2e173b044a2039efef818676a41a..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixViewExample_setElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/DenseMatrixViewExample_setElement.cu b/Documentation/Tutorials/Matrices/DenseMatrixViewExample_setElement.cu
deleted file mode 120000
index 9d1266dd3a5795d35fb97da20c24b91d6208cad2..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/DenseMatrixViewExample_setElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/MultidiagonalMatrixExample_Constructor.cpp b/Documentation/Tutorials/Matrices/MultidiagonalMatrixExample_Constructor.cpp
deleted file mode 120000
index da76904277e43a262fcd293b44defacaee4b96ff..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/MultidiagonalMatrixExample_Constructor.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/MultidiagonalMatrixExample_Constructor_init_list_1.cpp b/Documentation/Tutorials/Matrices/MultidiagonalMatrixExample_Constructor_init_list_1.cpp
deleted file mode 120000
index 1e5ca52b04d64ba3560dbbd682f7bc6b55356bd3..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/MultidiagonalMatrixExample_Constructor_init_list_1.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp b/Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp
index 5743c5e3279fcdc65e9a0cedb8cb9e005bbfcdd8..d323105cd08972c2ad4d3aba6bde982e38374948 100644
--- a/Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp
+++ b/Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp
@@ -146,7 +146,7 @@ void forElements( const int gridSize, Matrix& matrix )
    auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, float& value, bool& compute ) mutable {
       const int i = rowIdx % gridSize;
       const int j = rowIdx / gridSize;
-      if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 && localIdx == 0 )
+      if( ( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) && localIdx == 0 )
       {
          columnIdx = rowIdx;
          value = 1.0;
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_init_list_2.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_init_list_2.cpp
deleted file mode 120000
index 9d23bbb1c8210f48cb7fc30ee6017e30b8139f7d..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_init_list_2.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_init_list_2.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_init_list_2.cu
deleted file mode 120000
index 759f1a1ca5a3942d37c35f5f28e3774095008067..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_init_list_2.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_rowCapacities_vector.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_rowCapacities_vector.cpp
deleted file mode 120000
index ddeed9a7bc3fe6628aee27b3c0f8fcbf8fb48645..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_rowCapacities_vector.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_rowCapacities_vector.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_rowCapacities_vector.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_rowCapacities_vector.cu
deleted file mode 120000
index 5957448c64b03905dcd5c9e7935d275e33f6bedb..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_rowCapacities_vector.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_rowCapacities_vector.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_std_map.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_std_map.cpp
deleted file mode 120000
index dcc4ec9aef3a7b7e53fafee9e4738304f68f33b7..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_std_map.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_std_map.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_std_map.cu
deleted file mode 120000
index 75a75befb2cda5813fb7d01d1eb4681fb47b4528..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_Constructor_std_map.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_addElement.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_addElement.cpp
deleted file mode 120000
index 215dde5df06dfa5c541b862359267f2e83efccb9..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_addElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_addElement.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_addElement.cu
deleted file mode 120000
index c2425241fcb6bb03cfe57b09dbd70b1eaeca7f0a..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_addElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_rowsReduction_vectorProduct.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_reduceRows_vectorProduct.cpp
similarity index 90%
rename from Documentation/Tutorials/Matrices/SparseMatrixExample_rowsReduction_vectorProduct.cpp
rename to Documentation/Tutorials/Matrices/SparseMatrixExample_reduceRows_vectorProduct.cpp
index dd72230feab644ab6f0593f45e837de08e159721..ff0cde7d4c4f6a760a0cde22cbe5c683d28467c6 100644
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_rowsReduction_vectorProduct.cpp
+++ b/Documentation/Tutorials/Matrices/SparseMatrixExample_reduceRows_vectorProduct.cpp
@@ -6,7 +6,7 @@
 #include <TNL/Devices/Cuda.h>
 
 template< typename Device >
-void rowsReduction()
+void reduceRows()
 {
    TNL::Matrices::SparseMatrix< double, Device > matrix { 5, 5, {
       { 0, 0, 1 },
@@ -50,7 +50,7 @@ void rowsReduction()
    /***
     * Compute matrix-vector product.
     */
-   matrix.rowsReduction( 0, matrix.getRows(), fetch, std::plus<>{}, keep, 0.0 );
+   matrix.reduceRows( 0, matrix.getRows(), fetch, std::plus<>{}, keep, 0.0 );
 
    std::cout << "The matrix reads as:" << std::endl << matrix << std::endl;
    std::cout << "The input vector is:" << x << std::endl;
@@ -60,11 +60,11 @@ void rowsReduction()
 int main( int argc, char* argv[] )
 {
    std::cout << "Rows reduction on host:" << std::endl;
-   rowsReduction< TNL::Devices::Host >();
+   reduceRows< TNL::Devices::Host >();
 
 #ifdef HAVE_CUDA
    std::cout << std::endl;
    std::cout << "Rows reduction on CUDA device:" << std::endl;
-   rowsReduction< TNL::Devices::Cuda >();
+   reduceRows< TNL::Devices::Cuda >();
 #endif
 }
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_reduceRows_vectorProduct.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_reduceRows_vectorProduct.cu
new file mode 120000
index 0000000000000000000000000000000000000000..f2c6d541f679589d89e395777aba92992ae2d2f7
--- /dev/null
+++ b/Documentation/Tutorials/Matrices/SparseMatrixExample_reduceRows_vectorProduct.cu
@@ -0,0 +1 @@
+SparseMatrixExample_reduceRows_vectorProduct.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_rowsReduction_vectorProduct.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_rowsReduction_vectorProduct.cu
deleted file mode 120000
index 1be7a26d87fe2fb8c12ab3bc22ee82333c7ed21d..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_rowsReduction_vectorProduct.cu
+++ /dev/null
@@ -1 +0,0 @@
-SparseMatrixExample_rowsReduction_vectorProduct.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElement.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_setElement.cpp
deleted file mode 120000
index 1507393de5fbb5eff4088c8a08eafc6fadc72eab..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElement.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_setElement.cu
deleted file mode 120000
index 2f13c04edeb3506ca1f0aa9b73754c00c7759068..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements.cpp
deleted file mode 120000
index 0f5e5d1dd5d2ab59ce265203779509479203ba54..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements.cu
deleted file mode 120000
index 120be66591ee38ebaba3c5fa18c76076de9d7df9..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements_map.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements_map.cpp
deleted file mode 120000
index 5206fcc2e5d70994d67de439117a1acfa4dde872..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements_map.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements_map.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements_map.cu
deleted file mode 120000
index 9c5f7c0f52dc07f51cd25d62111ff1106a47b1f1..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setElements_map.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setRowCapacities.cpp b/Documentation/Tutorials/Matrices/SparseMatrixExample_setRowCapacities.cpp
deleted file mode 120000
index 973b2f3a8aef18f13f2b0c7d4defbd88a1996291..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setRowCapacities.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixExample_setRowCapacities.cu b/Documentation/Tutorials/Matrices/SparseMatrixExample_setRowCapacities.cu
deleted file mode 120000
index ef674e0f0c9fc59ff2d375ba437e2b94594e5e4c..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixExample_setRowCapacities.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp b/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp
index d9b668b20948d77a3fee38465467cbb76bba61d9..7af7de1e1031545ededa7e1ca29d333deaa9cc94 100644
--- a/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp
+++ b/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp
@@ -171,7 +171,7 @@ void forElements( const int gridSize, Matrix& matrix )
    auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, float& value, bool& compute ) mutable {
       const int i = rowIdx % gridSize;
       const int j = rowIdx / gridSize;
-      if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 && localIdx == 0 )
+      if( ( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) && localIdx == 0 )
       {
          columnIdx = rowIdx;
          value = 1.0;
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixViewExample_setElement.cpp b/Documentation/Tutorials/Matrices/SparseMatrixViewExample_setElement.cpp
deleted file mode 120000
index 0b861369e1d298d1a4a28d50d1d1a41186fd7f7c..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixViewExample_setElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixViewExample_setElement.cu b/Documentation/Tutorials/Matrices/SparseMatrixViewExample_setElement.cu
deleted file mode 120000
index 9a6e8304dda6e10d06cfd87fe8b1989b16dfa7ce..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/SparseMatrixViewExample_setElement.cu
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_Constructor_init_list_1.cpp b/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_Constructor_init_list_1.cpp
deleted file mode 120000
index f074fa48bc889cf387995910e92895dcedbd8ec2..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_Constructor_init_list_1.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_rowsReduction.cpp b/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_rowsReduction.cpp
deleted file mode 120000
index 5a8b79027874d4ee8a1d70e07228811e6cafc872..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_rowsReduction.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_setElement.cpp b/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_setElement.cpp
deleted file mode 120000
index aa3443952bb40bb19e089d06357c8d433c5ea204..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_setElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_setElements.cpp b/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_setElements.cpp
deleted file mode 120000
index 6a1a2e1ef872fe6664d627e73a895d9d3bf744c0..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/TridiagonalMatrixExample_setElements.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/TridiagonalMatrixViewExample_getRow.cpp b/Documentation/Tutorials/Matrices/TridiagonalMatrixViewExample_getRow.cpp
deleted file mode 120000
index 960c717f44d6fbbbe82eb34363d840b6412ba6e6..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/TridiagonalMatrixViewExample_getRow.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/TridiagonalMatrixViewExample_setElement.cpp b/Documentation/Tutorials/Matrices/TridiagonalMatrixViewExample_setElement.cpp
deleted file mode 120000
index 59094634e4b36a17746a4bedfe5bb2c3fb66b4f2..0000000000000000000000000000000000000000
--- a/Documentation/Tutorials/Matrices/TridiagonalMatrixViewExample_setElement.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp
\ No newline at end of file
diff --git a/Documentation/Tutorials/Matrices/tutorial_Matrices.md b/Documentation/Tutorials/Matrices/tutorial_Matrices.md
index 925d748e81b3084149f3c9c3d7380048a8554036..5c60bece9e45b774ae332a531b1f5170ebd33d8b 100644
--- a/Documentation/Tutorials/Matrices/tutorial_Matrices.md
+++ b/Documentation/Tutorials/Matrices/tutorial_Matrices.md
@@ -2,6 +2,8 @@
 
 [TOC]
 
+TODO: Add description of forRows and sequentialForRows.
+
 ## Introduction
 
 TNL offers several types of matrices like dense (\ref TNL::Matrices::DenseMatrix), sparse (\ref TNL::Matrices::SparseMatrix), tridiagonal (\ref TNL::Matrices::TridiagonalMatrix), multidiagonal (\ref TNL::Matrices::MultidiagonalMatrix) and lambda matrices (\ref TNL::Matrices::LambdaMatrix). The sparse matrices can be symmetric to lower the memory requirements. The interfaces of given matrix types are designed to be as unified as possible to ensure that the user can easily switch between different matrix types while making no or only a little changes in the source code. All matrix types allows traversing all matrix elements and manipulate them using lambda functions as well as performing flexible reduction in matrix rows. The following text describes particular matrix types and their unified interface in details.
@@ -154,7 +156,7 @@ There are several ways how to create a new matrix:
 4. **Methods `setElement` and `addElement` called on the host and copy matrix on GPU** setting particular matrix elements by the methods `setElement` and `addElement` when the matrix is allocated on GPU can be time consuming for large matrices. Setting up the matrix on CPU using the same methods and copying it on GPU at once when the setup is finished can be significantly more efficient. A drawback is that we need to allocate temporarily whole matrix on CPU.
 5. **Methods `setElement` and `addElement` called from native device** allow to do efficient matrix elements setup even on devices (GPUs). In this case, the methods must be called from a GPU kernel or a lambda function combined with the parallel for (\ref TNL::Algorithms::ParallelFor). The user get very good performance even when manipulating matrix allocated on GPU. On the other hand, only data structures allocated on GPUs can be accessed from the kernel or lambda function. The matrix can be accessed in the GPU kernel or lambda function by means of [matrix view](#matrix_view) or the shared pointer (\ref TNL::Pointers::SharedPointer).
 6. **Method `getRow` combined with `ParallelFor`** is very similar to the previous one. The difference is that we first fetch helper object called *matrix row* which is linked to particular matrix row. Using methods of this object, one may change the matrix elements in given matrix row. An advantage is that the access to the matrix row is resolved only once for all elements in the row. In some more sophisticated sparse matrix formats, this can be nontrivial operation and this approach may slightly improve the performance. Another advantage for sparse matrices is that we access the matrix elements based on their *local index* ('localIdx', see [Indexing of nonzero matrix elements in sparse matrices](indexing_of_nonzero_matrix_elements_in_sparse_matrices)) in the row which is something like a rank of the nonzero element in the row. This is more efficient than addressing the matrix elements by the column indexes which requires searching in the matrix row. So this may significantly improve the performance of setup of sparse matrices. When it comes to dense matrices, there should not be great difference in performance compared to use of the methods `setElement` and `getElement`. Note that when the method is called from a GPU kernel or a lambda function, only data structures allocated on GPU can be accessed and the matrix must be made accessible by the means of matrix view.
-7. **Method `forElements`** this approach is very similar to the previous one but it avoids using `ParallelFor` and necessity of passing the matrix to GPU kernels by matrix view or shared pointers.
+7. **Methods `forRows` and `forElements`** this approach is very similar to the previous one but it avoids using `ParallelFor` and necessity of passing the matrix to GPU kernels by matrix view or shared pointers.
 
 The following table shows pros and cons of particular methods:
 
@@ -175,7 +177,7 @@ The following table shows pros and cons of particular methods:
 |                                         |           |             |                                                                       | Requires writing GPU kernel or lambda function.                       |
 |                                         |           |             |                                                                       | Allows accessing only data allocated on the same device/memory space. |
 |                                         |           |             |                                                                       | Use of matrix local indexes can be less intuitive.                    |
-| **forElements**                         | *****     | **          | Best efficiency for sparse matrices.                                  | Requires setting of row capacities.                                   |
+| **forRows**, **forElements**            | *****     | **          | Best efficiency for sparse matrices.                                  | Requires setting of row capacities.                                   |
 |                                         |           |             | Avoid use of matrix view or shared pointer in kernels/lambda function.| Requires writing GPU kernel or lambda function.                       |
 |                                         |           |             |                                                                       | Allows accessing only data allocated on the same device/memory space. |
 |                                         |           |             |                                                                       | Use of matrix local indexes is less intuitive.                        |
@@ -393,7 +395,25 @@ Here we show an example:
 
 \includelineno DenseMatrixViewExample_getRow.cpp
 
-Here we create the matrix on the line 10 and get the matrix view on the line 16. Next we use `ParallelFor` (\ref TNL::Algorithms::ParallelFor) (line 26) to iterate over the matrix rows and the lambda function `f` (lines 18-21) for each of them. In the lambda function, we first fetch the matrix row by means of the merhod `getRow` (\ref TNL::Matrices::DenseMatrixView::getRow) and next we set the matrix elements by using the method `setElement` of the matrix row (\ref TNL::Matrices::DenseMatrixRowView::setElement). For the compatibility with the sparse matrices, use the variant of `setElement` with the parameter `localIdx`. It has no effect here, it is only for compatibility of the interface.
+Here we create the matrix on the line 10 and get the matrix view on the line 16. Next we use `ParallelFor` (\ref TNL::Algorithms::ParallelFor) (line 31) to iterate over the matrix rows and call the lambda function `f` (lines 19-26) for each of them. In the lambda function, we first fetch the matrix row by means of the method `getRow` (\ref TNL::Matrices::DenseMatrixView::getRow) and next we set the matrix elements by using the method `setElement` of the matrix row (\ref TNL::Matrices::DenseMatrixRowView::setElement). For the compatibility with the sparse matrices, use the variant of `setElement` with the parameter `localIdx`. It has no effect here, it is only for compatibility of the interface.
+
+The result looks as follows:
+
+\include DenseMatrixViewExample_getRow.out
+
+#### Method `forRows`
+
+This method iterates in parallel over all matrix rows. In fact, it combines \ref TNL::Algorithms::ParallelFor and \ref TNL::Matrices:::DenseMatrix::getRow method in one. See the following example. It is even a bit simpler compared to the previous one:
+
+\includelineno DenseMatrixExample_forRows.cpp
+
+The lambda function `f`, which is called for each matrix row (lines 18-25), have to accept parameter `row` with type `RowView`. This type is defined inside each TNL matrix and in the case of the dense matrix, it is \ref TNL::Matrices::DenseMatrixRowView. We use the method \ref TNL::Matrices::DenseMatrixRowView::getRowIndex to get the index of the matrix row being currently processed and method \ref TNL::Matrices::DenseMatrixRowView::setElement which sets the value of the element with given column index (the first parameter).
+
+Next, on the lines 32-38, we call another lambda function which firstly find the largest element in each row (lines 33-35) and then it divides the matrix row by its value (lines 36-37).
+
+The result looks as follows:
+
+\include DenseMatrixExample_forRows.out
 
 #### Method `forElements`
 
@@ -416,7 +436,7 @@ The result looks as follows:
 
 [Sparse matrices](https://en.wikipedia.org/wiki/Sparse_matrix) are extremely important in a lot of numerical algorithms. They are used at situations when we need to operate with matrices having majority of the matrix elements equal to zero. In this case, only the non-zero matrix elements are stored with possibly some *padding zeros* used for memory alignment. This is necessary mainly on GPUs. See the [Overview of matrix types](#overview_of_matrix_types) for the differences in memory requirements.
 
-Major disadvantage of sparse matrices is that there are a lot of different formats for their storage in memory. Though [CSR (Compressed Sparse Row)](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)) format is the most popular of all, especially for GPUs, there are many other formats. Often their performance differ significantly for various matrices. So it is a good idea to test several sparse matrix formats if you want to get the best performance. In TNL, there is one templated class \ref TNL::Matrices::SparseMatrix representing general sparse matrices. The change of underlying matrix format can be done just by changing one template parameter. The list of the template paramaters is as follows:
+Major disadvantage of sparse matrices is that there are a lot of different formats for their storage in memory. Though [CSR (Compressed Sparse Row)](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)) format is the most popular of all, especially for GPUs, there are many other formats. Often their performance differ significantly for various matrices. So it is a good idea to test several sparse matrix formats if you want to get the best performance. In TNL, there is one templated class \ref TNL::Matrices::SparseMatrix representing general sparse matrices. The change of underlying matrix format can be done just by changing one template parameter. The list of the template parameters is as follows:
 
 * `Real` is type if the matrix elements. It is `double` by default.
 * `Device` is a device where the matrix is allocated. Currently it can be either \ref TNL::Devices::Host for CPU or \ref TNL::Devices::Cuda for CUDA supporting GPUs. It is \ref TNL::Devices::Host by default.
@@ -475,7 +495,6 @@ The result of both examples looks as follows:
 
 \include SparseMatrixExample_Constructor_init_list_1.out
 
-
 #### Initializer list
 
 Small matrices can be initialized by a constructor with an [initializer list](https://en.cppreference.com/w/cpp/utility/initializer_list). We assume having the following sparse matrix
@@ -554,9 +573,9 @@ The result looks as follows:
 
 More efficient method, especially for GPUs, is to combine `getRow` (\ref TNL::Matrices::SparseMatrix::getRow) method with `ParallelFor` (\ref TNL::Algorithms::ParallelFor) and lambda function as the following example demonstrates:
 
-\includelineno SparseMatrixViewExample_getRow.cpp
+\includelineno SparseMatrixExample_getRow.cpp
 
-On the line 11, we create small matrix having five rows (number of rows is given by the size of the [initializer list](https://en.cppreference.com/w/cpp/utility/initializer_list) ) and columns (number of columns is given by the second parameter) and we set each row capacity to one (particular elements of the initializer list). On the line 22, we call `ParallelFor` (\ref TNL::Algorithms::ParallelFor) to iterate over all matrix elements. Each row is processed by the lambda function `f` (lines 14-17). In the lambda function, we first fetch a sparse matrix row (\ref TNL::Matrices::SparseMatrixRowView) which serves for accessing particular matrix rows. This object has a method `setElement` (\ref TNL::Matrices::SparseMatrixRowView::setElement) accepting three parameters:
+On the line 21, we create small matrix having five rows (number of rows is given by the size of the [initializer list](https://en.cppreference.com/w/cpp/utility/initializer_list) ) and columns (number of columns is given by the second parameter) and we set each row capacity to one or three (particular elements of the initializer list). On the line 41, we call `ParallelFor` (\ref TNL::Algorithms::ParallelFor) to iterate over all matrix rows. Each row is processed by the lambda function `f` (lines 24-36). In the lambda function, we first fetch a sparse matrix row (\ref TNL::Matrices::SparseMatrixRowView) (line 25) which serves for accessing particular matrix elements in the matrix row. This object has a method `setElement` (\ref TNL::Matrices::SparseMatrixRowView::setElement) accepting three parameters:
 
 1. `localIdx` is a rank of the nonzero element in given matrix row.
 2. `columnIdx` is the new column index of the matrix element.
@@ -564,7 +583,25 @@ On the line 11, we create small matrix having five rows (number of rows is given
 
 The result looks as follows:
 
-\include SparseMatrixViewExample_getRow.out
+\include SparseMatrixExample_getRow.out
+
+#### Method `forRows`
+
+The method `forRows` (\ref TNL::Matrices::SparseMatrix::forRows) calls the method `getRow` (\ref TNL::Matrices::SparseMatrix::getRow) in parallel. See the following example which has the same effect as the previous one but it is slightly simpler:
+
+\includelineno SparseMatrixExample_forRows.cpp
+
+The differences are:
+
+1. We do not need to get the matrix view as we did in the previous example.
+2. We call the method `forAllRows` (\ref TNL::Matrices::SparseMatrix::forAllRows) instead of `ParallelFor` (\ref TNL::Algorithms::ParallelFor) which is simpler since we do not have to state the device type explicitly. The method `forAllRows` calls the method `forRows` for all matrix rows so we do not have to state explicitly the interval of matrix rows neither.
+3. The lambda function `f` (lines 27-39) accepts one parameter `row` of the type `RowView` (\ref TNL::Matrices::SparseMatrix::RowView which is \ref TNL::Matrices::SparseMatrixRowView) instead of the index of the matrix row. Therefore we do not need to call the method `getRow` (\ref TNL::Matrices::SparseMatrix::getRow). On the other hand, we need the method `geRowIndex` (\ref TNL::Matrices::SparseMatrixRowView::getRowIndex) to get the index of the matrix row (line 28).
+
+On the lines 46-52, we call a lambda function which computes sum of all elements in a row (lines 47-49) and it divides the row by the `sum` then (lines 50-51).
+
+ The result looks as follows:
+
+\include SparseMatrixExample_forRows.out
 
 #### Method `forElements`
 
@@ -816,16 +853,34 @@ The result looks as follows:
 
 #### Method `getRow`
 
- A bit different way how to do the same is the use of tridiagonal matrix view and the method `getRow` (\ref TNL::Matrices::TridiagonalMatrixView::getRow) as the following example demonstrates:
+ A bit different way of setting up the matrix, is the use of tridiagonal matrix view and the method `getRow` (\ref TNL::Matrices::TridiagonalMatrixView::getRow) as the following example demonstrates:
 
 \includelineno TridiagonalMatrixViewExample_getRow.cpp
 
-We create a matrix with the same size (line 10-15). Next, we fetch the tridiagonal matrix view (ef TNL::Matrices::TridiagonalMatrixView ,line 16) which we use in the lambda function for matrix elements modification (lines 18-26). Inside the lambda function, we first get a matrix row by calling the method `getRow` (\ref TNL::Matrices::TridiagonalMatrixView::getRow) using which we can access the matrix elements (lines 21-25). We would like to stress that the method `setElement` addresses the matrix elements with the `localIdx` parameter which is a rank of the nonzero element in the matrix row - see [Indexing of nonzero matrix elements in sparse matrices](#indexing-of-nonzero-matrix-elements-in-sparse-matrices). The lambda function is called by the `ParallelFor` (\ref TNL::Algorithms::ParallelFor).
+We create a matrix with the same size (line 22-27). Next, we fetch the tridiagonal matrix view (ef TNL::Matrices::TridiagonalMatrixView ,line 28) which we use in the lambda function for matrix elements modification (lines 30-38). Inside the lambda function, we first get a matrix row by calling the method `getRow` (\ref TNL::Matrices::TridiagonalMatrixView::getRow) using which we can access the matrix elements (lines 33-37). We would like to stress that the method `setElement` addresses the matrix elements with the `localIdx` parameter which is a rank of the nonzero element in the matrix row - see [Indexing of nonzero matrix elements in sparse matrices](#indexing-of-nonzero-matrix-elements-in-sparse-matrices). The lambda function is called by the `ParallelFor` (\ref TNL::Algorithms::ParallelFor).
 
 The result looks as follows:
 
 \include TridiagonalMatrixViewExample_getRow.out
 
+### Method `forRows`
+
+As in the case of other matrix types, the method `forRows` (\ref TNL::Matrices::TridiagonalMatrix::forRows) calls the method `getRow` (\ref TNL::Matrices::TridiagonalMatrix::getRow) in parallel. It is demonstrated by the following example which we may directly compare with the previous one:
+
+\includelineno TridiagonalMatrixExample_forRows.cpp
+
+The differences are:
+
+1. We do not need to get the matrix view as we did in the previous example.
+2. We call the method `forAllRows` (\ref TNL::Matrices::TridiagonalMatrix::forAllRows) (line 33) instead of `ParallelFor` (\ref TNL::Algorithms::ParallelFor) which is simpler since we do not have to state the device type explicitly. The method `forAllRows` calls the method `forRows` for all matrix rows so we do not have to state explicitly the interval of matrix rows neither.
+3. The lambda function `f` (lines 25-31) accepts one parameter `row` of the type `RowView` (\ref TNL::Matrices::TridiagonalMatrix::RowView which is \ref TNL::Matrices::TridiagonalMatrixRowView) instead of the index of the matrix row. Therefore we do not need to call the method `getRow` (\ref TNL::Matrices::TridiagonalMatrix::getRow). On the other hand, we need the method `geRowIndex` (\ref TNL::Matrices::TridiagonalMatrixRowView::getRowIndex) to get the index of the matrix row (line 24).
+
+Next, we compute sum of absolute values of matrix elements in each row and store it in a vector (lines 39-46). Firstly we create the vector `sum_vector` for storing the sums (line 39) and get a vector view `sum_view` to get access to the vector from a lambda function. On the lines 41-46, we call lambda function for each matrix row which iterates over all matrix elements and sum their absolute values. Finally we store the result to the output vector (line 45).
+
+The result looks as follows:
+
+\include TridiagonalMatrixExample_forRows.out
+
 #### Method `forElements`
 
 Finally, even a bit more simple way of matrix elements manipulation with the method `forElements` (\ref TNL::Matrices::TridiagonalMatrix::forElements) is demonstrated in the following example:
@@ -1082,6 +1137,20 @@ We use `ParallelFor2D` (\ref TNL::Algorithms::ParallelFor2D) to iterate over all
 
 \include MultidiagonalMatrixExample_Constructor.out
 
+### Method `forRows`
+
+As in the case of other matrix types, the method `forRows` (\ref TNL::Matrices::MultidiagonalMatrix::forRows) calls the method `getRow` (\ref TNL::Matrices::MultidiagonalMatrix::getRow) in parallel. It is demonstrated by the following example:
+
+\includelineno MultidiagonalMatrixExample_forRows.cpp
+
+ We call the method `forAllRows` (\ref TNL::Matrices::MultidiagonalMatrix::forAllRows) (line 36) instead of `ParallelFor` (\ref TNL::Algorithms::ParallelFor) which is simpler since we do not have to state the device type explicitly. The method `forAllRows` calls the method `forRows` for all matrix rows so we do not have to state explicitly the interval of matrix rows neither. The lambda function `f` (lines 28-35) accepts one parameter `row` of the type `RowView` (\ref TNL::Matrices::MultidiagonalMatrix::RowView which is \ref TNL::Matrices::MultidiagonalMatrixRowView). At the beginning of the lambda function, we call the method `geRowIndex` (\ref TNL::Matrices::MultidiagonalMatrixRowView::getRowIndex) to get the index of the matrix row (line 29).
+
+Next, we compute sum of absolute values of matrix elements in each row and store it in a vector (lines 39-46). Firstly we create the vector `sum_vector` for storing the sums (line 39) and get a vector view `sum_view` to get access to the vector from a lambda function. On the lines 41-46, we call lambda function for each matrix row which iterates over all matrix elements and sum their absolute values. Finally we store the result to the output vector (line 45).
+
+The result looks as follows:
+
+\include MultidiagonalMatrixExample_forRows.out
+
 #### Method `forElements`
 
 Similar and even a bit simpler way of setting the matrix elements is offered by the method `forElements` (\ref TNL::Matrices::MultidiagonalMatrix::forElements, \ref TNL::Matrices::MultidiagonalMatrixView::forElements) as demonstrated in the following example:
@@ -1151,6 +1220,24 @@ The result looks as follows:
 
 \include LambdaMatrixExample_Constructor.out
 
+#### Method `forRows`
+
+Method `forRows` (\ref TNL::Matrices::LambdaMatrix::forRows, \ref TNL::Matrices::LambdaMatrix::forAllRows) iterates in parallel over all matrix rows. In the case of lambda matrices, it cannot be used for changing the matrix elements since they cannot be changed. In the following example, we show how to use this method to copy the matrix elements values to the dense matrix:
+
+\includelineno LambdaMatrixExample_forRows.cpp
+
+We start with the lambda functions (lines 17-61) defining the elements of the lambda matrix. Next, we create the lambda matrix `matrix` (lines 62-64) and the dense matrix `denseMatrix` (lines 67-68) together with the dense matrix view (line 69). The lambda function `f` (lines 70-74) serves for copying matrix elements from the lambda matrix to the dense matrix. The process of matrix elements copying is started by calling the method `forAllRows` (\ref TNL::Matrices::LambdaMatrix::forRows, \ref TNL::Matrices::LambdaMatrix::forAllRows) (line 75).
+
+Note, however, that use of `forElements` method (\ref TNL::Matrices::LambdaMatrix::forElements) would be more convenient.
+
+Next, we compute sum of absolute values of matrix elements in each row and store it in a vector (lines 83-90). Firstly we create the vector `sum_vector` for storing the sums (line 83) and get a vector view `sum_view` to get access to the vector from a lambda function. On the lines 85-90, we call lambda function for each matrix row which iterates over all matrix elements and sum their absolute values. Finally we store the result to the output vector (line 92).
+
+
+
+The result looks as follows:
+
+\include LambdaMatrixExample_forRows.out
+
 #### Method `forElements`
 
 The lambda matrix has the same interface as other matrix types except of the method `getRow`. The following example demonstrates the use of the method `forElements` (\ref TNL::Matrices::LambdaMatrix::forElements) to copy the lambda matrix into the dense matrix:
@@ -1181,8 +1268,8 @@ TODO: Write documentation on distributed matrices.
 
 ## Flexible reduction in matrix rows
 
-Flexible reduction in matrix rows is a powerful tool for many different matrix operations. It is represented by the method `rowsReduction` (\ref TNL::Matrices::DenseMatrix::rowsReduction,
-\ref TNL::Matrices::SparseMatrix::rowsReduction, \ref TNL::Matrices::TridiagonalMatrix::rowsReduction, \ref TNL::Matrices::MultidiagonalMatrix::rowsReduction, \ref TNL::Matrices::LambdaMatrix::rowsReduction) and similar to the method `forElements` it iterates over particular matrix rows. However, it performs *flexible paralell reduction* in addition. For example, the matrix-vector product can be seen as a reduction of products of matrix elements with the input vector in particular matrix rows. The first element of the result vector ios obtained as:
+Flexible reduction in matrix rows is a powerful tool for many different matrix operations. It is represented by the method `reduceRows` (\ref TNL::Matrices::DenseMatrix::reduceRows,
+\ref TNL::Matrices::SparseMatrix::reduceRows, \ref TNL::Matrices::TridiagonalMatrix::reduceRows, \ref TNL::Matrices::MultidiagonalMatrix::reduceRows, \ref TNL::Matrices::LambdaMatrix::reduceRows) and similar to the method `forElements` it iterates over particular matrix rows. However, it performs *flexible paralell reduction* in addition. For example, the matrix-vector product can be seen as a reduction of products of matrix elements with the input vector in particular matrix rows. The first element of the result vector ios obtained as:
 
 \f[
 y_1 = a_{11} x_1 + a_{12} x_2 + \ldots + a_{1n} x_n = \sum_{j=1}^n a_{1j}x_j
@@ -1236,7 +1323,7 @@ The meaning of the particular parameters is as follows:
 1. `rowIdx` is an index of the matrix row related to given result of flexible reduction.
 2. `value`is the result of the flexible reduction in given matrix row.
 
-The method `rowsReduction` (\ref TNL::Matrices::DenseMatrix::rowsReduction, \ref TNL::Matrices::SparseMatrix::rowsReduction, \ref TNL::Matrices::TridiagonalMatrix::rowsReduction, \ref TNL::Matrices::MultidiagonalMatrix::rowsReduction, \ref TNL::Matrices::LambdaMatrix::rowsReduction) accepts the following arguments:
+The method `reduceRows` (\ref TNL::Matrices::DenseMatrix::reduceRows, \ref TNL::Matrices::SparseMatrix::reduceRows, \ref TNL::Matrices::TridiagonalMatrix::reduceRows, \ref TNL::Matrices::MultidiagonalMatrix::reduceRows, \ref TNL::Matrices::LambdaMatrix::reduceRows) accepts the following arguments:
 
 1. `begin` is the beginning of the matrix rows range on which the reduction will be performed.
 2. `end` is the end of the matrix rows range on which the reduction will be performed. The last matrix row which is going to be processed has index `end-1`.
@@ -1255,7 +1342,7 @@ The following example demonstrates implementation of the dense matrix-vector pro
    y_i = \sum_{j=0}^{columns - 1} a_{ij} x_j \text{ for } i = 0, \ldots, rows-1.
 \f]
 
-\includelineno DenseMatrixExample_rowsReduction_vectorProduct.cpp
+\includelineno DenseMatrixExample_reduceRows_vectorProduct.cpp
 
 We set the following lambda functions:
 
@@ -1265,7 +1352,7 @@ We set the following lambda functions:
 
 The result looks as:
 
-\include DenseMatrixExample_rowsReduction_vectorProduct.out
+\include DenseMatrixExample_reduceRows_vectorProduct.out
 
 We will show one more example which is a computation of maximal absolute value in each matrix row. The results will be stored in a vector:
 
@@ -1275,7 +1362,7 @@ y_i = \max_{j=1,\ldots,n} |a_{ij}|.
 
 See the following example:
 
-\includelineno DenseMatrixExample_rowsReduction_maxNorm.cpp
+\includelineno DenseMatrixExample_reduceRows_maxNorm.cpp
 
 The lambda functions rare:
 
@@ -1285,13 +1372,13 @@ The lambda functions rare:
 
 Note, that the idempotent value for the reduction is \ref std::numeric_limits< double >::lowest. Of course, if we compute the maximum of all output vector elements, we get some kind of maximal matrix norm. The output looks as:
 
-\include DenseMatrixExample_rowsReduction_maxNorm.out
+\include DenseMatrixExample_reduceRows_maxNorm.out
 
 ### Sparse matrices example
 
 The following example demonstrates sparse matrix-vector product:
 
-\includelineno SparseMatrixExample_rowsReduction_vectorProduct.cpp
+\includelineno SparseMatrixExample_reduceRows_vectorProduct.cpp
 
 On the lines 11-16 we set the following matrix:
 
@@ -1309,7 +1396,7 @@ On the lines 11-16 we set the following matrix:
 
 The lambda functions on the lines 39-48 are the same as in the example with the dense matrix. The result looks as follows:
 
-\include SparseMatrixExample_rowsReduction_vectorProduct.out
+\include SparseMatrixExample_reduceRows_vectorProduct.out
 
 ### Tridiagonal matrices example
 
@@ -1329,7 +1416,7 @@ In this example, we will compute maximal absolute value in each row of the follo
 
 The source code reads as follows:
 
-\includelineno TridiagonalMatrixExample_rowsReduction.cpp
+\includelineno TridiagonalMatrixExample_reduceRows.cpp
 
 Here we first set the tridiagonal matrix (lines 10-27). Next we allocate the vector `rowMax` where we will store the results (line 32). The lambda function are:
 
@@ -1339,7 +1426,7 @@ Here we first set the tridiagonal matrix (lines 10-27). Next we allocate the vec
 
 Note, that the idempotent value for the reduction is \ref std::numeric_limits< double >::lowest. The results looks as follows:
 
-\include TridiagonalMatrixExample_rowsReduction.out
+\include TridiagonalMatrixExample_reduceRows.out
 
 ### Multidiagonal matrices example
 
@@ -1363,15 +1450,15 @@ We first create vector `rowMax` into which we will store the results and fetch i
 * `reduce` (lines 51-53) returns maximum value of the two input values `a` and `b`.
 * `keep` (line 58-60) stores the input `value` at the corresponding position, given by the row index `rowIdx`, in the output vector view `rowMaxView`.
 
-Finally, we call the method `rowsReduction` (\ref TNL::Matrices::MultidiagonalMatrix::rowsReduction) with parameters telling the interval of rows to be processed (the first and second parameter), the lambda functions `fetch`, `reduce` and `keep`, and the idempotent element for the reduction operation which is the lowest number of given type (\ref std::numeric_limits< double >::lowest ). The result looks as follows:
+Finally, we call the method `reduceRows` (\ref TNL::Matrices::MultidiagonalMatrix::reduceRows) with parameters telling the interval of rows to be processed (the first and second parameter), the lambda functions `fetch`, `reduce` and `keep`, and the idempotent element for the reduction operation which is the lowest number of given type (\ref std::numeric_limits< double >::lowest ). The result looks as follows:
 
-\include MultidiagonalMatrixExample_rowsReduction.out
+\include MultidiagonalMatrixExample_reduceRows.out
 
 ### Lambda matrices example
 
 The reduction of matrix rows is available for the lambda matrices as well. See the following example:
 
-\includelineno LambdaMatrixExample_rowsReduction.cpp
+\includelineno LambdaMatrixExample_reduceRows.cpp
 
 On the lines 14-21, we create the lower triangular lambda matrix which looks as follows:
 
@@ -1395,7 +1482,7 @@ We want to compute maximal absolute value of matrix elements in each row. For th
 
 Note that the interface of the lambda functions is the same as for other matrix types. The result looks as follows:
 
-\include LambdaMatrixExample_rowsReduction.out
+\include LambdaMatrixExample_reduceRows.out
 
 ## Matrix-vector product
 
diff --git a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
index 3ef168a2514120adeed63b8eadd5dbe4fb5e4176..931d07d2b532bdb3bce834a557377dcf98220296 100644
--- a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp
@@ -29,7 +29,7 @@ int main( int argc, char* argv[] )
 {
    Vector< double, Devices::Host > host_u( 10 ), host_v( 10 );
    host_u = 1.0;
-   host_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
+   host_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
    std::cout << "host_u = " << host_u << std::endl;
    std::cout << "host_v = " << host_v << std::endl;
    std::cout << "Comparison of host_u and host_v is: " << ( comparison( host_u, host_v ) ? "'true'" : "'false'" ) << "." << std::endl;
@@ -37,7 +37,7 @@ int main( int argc, char* argv[] )
 #ifdef HAVE_CUDA
    Vector< double, Devices::Cuda > cuda_u( 10 ), cuda_v( 10 );
    cuda_u = 1.0;
-   cuda_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
+   cuda_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
    std::cout << "cuda_u = " << cuda_u << std::endl;
    std::cout << "cuda_v = " << cuda_v << std::endl;
    std::cout << "Comparison of cuda_u and cuda_v is: " << ( comparison( cuda_u, cuda_v ) ? "'true'" : "'false'" ) << "." << std::endl;
diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
index eeccc728fb4ea23d1e3a95f22f76c70f1773fddb..90a069c8a9286b1bf7a6262d7ead7d60f2e7ae50 100644
--- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp
@@ -20,7 +20,7 @@ double mapReduce( Vector< double, Device >& u )
 int main( int argc, char* argv[] )
 {
    Vector< double, Devices::Host > host_u( 10 );
-   host_u.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = sin( ( double ) i ); } );
+   host_u.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = sin( ( double ) i ); } );
    double result = mapReduce( host_u );
    std::cout << "host_u = " << host_u << std::endl;
    std::cout << "Sum of the positive numbers is:" << result << std::endl;
diff --git a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
index 1b31eb5e5395fe8c5d4f4387ccb7b38c74d40bb2..8d503cbd4a7ed079dfff6b1d81511dc3ebc357bc 100644
--- a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp
@@ -19,12 +19,12 @@ double maximumNorm( const Vector< double, Device >& v )
 int main( int argc, char* argv[] )
 {
    Vector< double, Devices::Host > host_v( 10 );
-   host_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
+   host_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
    std::cout << "host_v = " << host_v << std::endl;
    std::cout << "The maximum norm of the host vector elements is " << maximumNorm( host_v ) << "." << std::endl;
 #ifdef HAVE_CUDA
    Vector< double, Devices::Cuda > cuda_v( 10 );
-   cuda_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
+   cuda_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
    std::cout << "cuda_v = " << cuda_v << std::endl;
    std::cout << "The maximum norm of the CUDA vector elements is " << maximumNorm( cuda_v ) << "." << std::endl;
 #endif
diff --git a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
index 689d8b599c15a011d64624b2688004c480aa1e72..18ac3363bf4632b07f35404d5d887c6ed7637e9c 100644
--- a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp
@@ -28,13 +28,13 @@ maximumNorm( const Vector< double, Device >& v )
 int main( int argc, char* argv[] )
 {
    Vector< double, Devices::Host > host_v( 10 );
-   host_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
+   host_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
    std::cout << "host_v = " << host_v << std::endl;
    auto maxNormHost = maximumNorm( host_v );
    std::cout << "The maximum norm of the host vector elements is " <<  maxNormHost.first << " at position " << maxNormHost.second << "." << std::endl;
 #ifdef HAVE_CUDA
    Vector< double, Devices::Cuda > cuda_v( 10 );
-   cuda_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
+   cuda_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = i - 7; } );
    std::cout << "cuda_v = " << cuda_v << std::endl;
    auto maxNormCuda = maximumNorm( cuda_v );
    std::cout << "The maximum norm of the device vector elements is " <<  maxNormCuda.first << " at position " << maxNormCuda.second << "." << std::endl;
diff --git a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
index 5a63b460b87cced54cfad32adadd1d7707749fa7..680075f8426d8d7fe35292cbc18cc818cfbeb6d9 100644
--- a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
+++ b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp
@@ -28,7 +28,7 @@ int main( int argc, char* argv[] )
     */
    Vector< double, Devices::Host > host_u( 10 ), host_v( 10 );
    host_u = 1.0;
-   host_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
+   host_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
    std::cout << "host_u = " << host_u << std::endl;
    std::cout << "host_v = " << host_v << std::endl;
    std::cout << "The scalar product ( host_u, host_v ) is " << scalarProduct( host_u, host_v ) << "." << std::endl;
@@ -40,7 +40,7 @@ int main( int argc, char* argv[] )
 #ifdef HAVE_CUDA
    Vector< double, Devices::Cuda > cuda_u( 10 ), cuda_v( 10 );
    cuda_u = 1.0;
-   cuda_v.forEachElement( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
+   cuda_v.forAllElements( [] __cuda_callable__ ( int i, double& value ) { value = 2 * ( i % 2 ) - 1; } );
    std::cout << "cuda_u = " << cuda_u << std::endl;
    std::cout << "cuda_v = " << cuda_v << std::endl;
    std::cout << "The scalar product ( cuda_u, cuda_v ) is " << scalarProduct( cuda_u, cuda_v ) << "." << std::endl;
diff --git a/Documentation/Tutorials/Vectors/Expressions.cpp b/Documentation/Tutorials/Vectors/Expressions.cpp
index 5ccad7c6da13139fc56709b3698e21ad31b2e721..b41d2188eadf3ccbdd8317634669f962d47bf5a9 100644
--- a/Documentation/Tutorials/Vectors/Expressions.cpp
+++ b/Documentation/Tutorials/Vectors/Expressions.cpp
@@ -20,7 +20,7 @@ void expressions()
    ViewType a = a_v.getView();
    ViewType b = b_v.getView();
    ViewType c = c_v.getView();
-   a.forEachElement( [] __cuda_callable__ ( int i, RealType& value ) { value = 3.14 * ( i - 5.0 ) / 5.0; } );
+   a.forAllElements( [] __cuda_callable__ ( int i, RealType& value ) { value = 3.14 * ( i - 5.0 ) / 5.0; } );
    b = a * a;
    c = 3 * a + sign( a ) * sin( a );
    std::cout << "a = " << a << std::endl;
diff --git a/Documentation/Tutorials/Vectors/Reduction.cpp b/Documentation/Tutorials/Vectors/Reduction.cpp
index 5646b48690900268bf94133bd9f2510c11c3caf0..b0034f6d53819b139fbd5bcca801a078e9e31159 100644
--- a/Documentation/Tutorials/Vectors/Reduction.cpp
+++ b/Documentation/Tutorials/Vectors/Reduction.cpp
@@ -20,8 +20,8 @@ void expressions()
    ViewType a = a_v.getView();
    ViewType b = b_v.getView();
    ViewType c = c_v.getView();
-   a.forEachElement( [] __cuda_callable__ ( int i, RealType& value ) { value = i; } );
-   b.forEachElement( [] __cuda_callable__ ( int i, RealType& value ) { value = i - 5.0; } );
+   a.forAllElements( [] __cuda_callable__ ( int i, RealType& value ) { value = i; } );
+   b.forAllElements( [] __cuda_callable__ ( int i, RealType& value ) { value = i - 5.0; } );
    c = -5;
 
    std::cout << "a = " << a << std::endl;
diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Sparse.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Sparse.h
index 2e50843c2e2b3e28fc6ceee2fb36d3ec81b7daa1..f65527b09f49da90fd89503fdc375a6680ba8f9d 100644
--- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Sparse.h
+++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Sparse.h
@@ -29,7 +29,7 @@ class Sparse : public TNL::Matrices::Matrix< Real, Device, Index >
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef typename TNL::Matrices::Matrix< RealType, DeviceType, IndexType >::ValuesVectorType ValuesVector;
+   typedef typename TNL::Matrices::Matrix< RealType, DeviceType, IndexType >::ValuesType ValuesVector;
    typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector;
    typedef TNL::Matrices::Matrix< Real, Device, Index > BaseType;
    typedef SparseRow< RealType, IndexType > MatrixRow;
diff --git a/src/TNL/Algorithms/Segments/BiEllpack.h b/src/TNL/Algorithms/Segments/BiEllpack.h
index c32dc1f22e97f20924b9ed29d04ad477abd0e87c..3a5a7c2026a8efd69d54cfd472807a039f85d43e 100644
--- a/src/TNL/Algorithms/Segments/BiEllpack.h
+++ b/src/TNL/Algorithms/Segments/BiEllpack.h
@@ -15,162 +15,173 @@
 #include <TNL/Algorithms/Segments/BiEllpackView.h>
 #include <TNL/Algorithms/Segments/SegmentView.h>
 
-namespace TNL {
-   namespace Algorithms {
-      namespace Segments {
-
-template< typename Device,
-          typename Index,
-          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >,
-          ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization< Device >::getOrganization(),
-          int WarpSize = 32 >
-class BiEllpack
+namespace TNL
 {
-   public:
+   namespace Algorithms
+   {
+      namespace Segments
+      {
 
-      using DeviceType = Device;
-      using IndexType = std::remove_const_t< Index >;
-      using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >;
-      static constexpr ElementsOrganization getOrganization() { return Organization; }
-      using ViewType = BiEllpackView< Device, Index, Organization >;
-      template< typename Device_, typename Index_ >
-      using ViewTemplate = BiEllpackView< Device_, Index_, Organization >;
-      using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, Organization >;
-      using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >;
+         template <typename Device,
+                   typename Index,
+                   typename IndexAllocator = typename Allocators::Default<Device>::template Allocator<Index>,
+                   ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization<Device>::getOrganization(),
+                   int WarpSize = 32>
+         class BiEllpack
+         {
+         public:
+            using DeviceType = Device;
+            using IndexType = std::remove_const_t<Index>;
+            using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocator>;
+            static constexpr ElementsOrganization getOrganization() { return Organization; }
+            using ViewType = BiEllpackView< Device, Index, Organization, WarpSize >;
+            template <typename Device_, typename Index_>
+            using ViewTemplate = BiEllpackView<Device_, Index_, Organization, WarpSize >;
+            using ConstViewType = typename ViewType::ConstViewType;
+            using SegmentViewType = typename ViewType::SegmentViewType;
 
-      static constexpr bool havePadding() { return true; };
+            static constexpr bool havePadding() { return true; };
 
-      BiEllpack() = default;
+            BiEllpack() = default;
 
-      BiEllpack( const Containers::Vector< IndexType, DeviceType, IndexType >& sizes );
+            BiEllpack(const Containers::Vector<IndexType, DeviceType, IndexType> &sizes);
 
-      BiEllpack( const BiEllpack& segments );
+            BiEllpack(const BiEllpack &segments);
 
-      BiEllpack( const BiEllpack&& segments );
+            BiEllpack(const BiEllpack &&segments);
 
-      static String getSerializationType();
+            static String getSerializationType();
 
-      static String getSegmentsType();
+            static String getSegmentsType();
 
-      ViewType getView();
+            ViewType getView();
 
-      const ConstViewType getConstView() const;
+            const ConstViewType getConstView() const;
 
-      /**
+            /**
        * \brief Number of segments.
        */
-      __cuda_callable__
-      IndexType getSegmentsCount() const;
+            __cuda_callable__
+                IndexType
+                getSegmentsCount() const;
 
-      /**
+            /**
        * \brief Set sizes of particular segments.
        */
-      template< typename SizesHolder = OffsetsHolder >
-      void setSegmentsSizes( const SizesHolder& sizes );
+            template <typename SizesHolder = OffsetsHolder>
+            void setSegmentsSizes(const SizesHolder &sizes);
 
-      void reset();
+            void reset();
 
-      IndexType getSegmentSize( const IndexType segmentIdx ) const;
+            IndexType getSegmentSize(const IndexType segmentIdx) const;
 
-      /**
+            /**
        * \brief Number segments.
        */
-      __cuda_callable__
-      IndexType getSize() const;
+            __cuda_callable__
+                IndexType
+                getSize() const;
 
-      __cuda_callable__
-      IndexType getStorageSize() const;
+            __cuda_callable__
+                IndexType
+                getStorageSize() const;
 
-      __cuda_callable__
-      IndexType getGlobalIndex( const IndexType segmentIdx, const IndexType localIdx ) const;
+            __cuda_callable__
+                IndexType
+                getGlobalIndex(const IndexType segmentIdx, const IndexType localIdx) const;
 
-      __cuda_callable__
-      SegmentViewType getSegmentView( const IndexType segmentIdx ) const;
+            __cuda_callable__
+                SegmentViewType
+                getSegmentView(const IndexType segmentIdx) const;
 
-      /***
+            /***
        * \brief Go over all segments and for each segment element call
        * function 'f' with arguments 'args'. The return type of 'f' is bool.
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+            template <typename Function>
+            void forElements(IndexType first, IndexType last, Function &&f) const;
+
+            template <typename Function>
+            void forAllElements(Function &&f) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+            template <typename Function>
+            void forSegments(IndexType begin, IndexType end, Function &&f) const;
 
+            template <typename Function>
+            void forEachSegment(Function &&f) const;
 
-      /***
+            /***
        * \brief Go over all segments and perform a reduction in each of them.
        */
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+            template <typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args>
+            void segmentsReduction(IndexType first, IndexType last, Fetch &fetch, const Reduction &reduction, ResultKeeper &keeper, const Real &zero, Args... args) const;
 
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
-      void allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const;
+            template <typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args>
+            void allReduction(Fetch &fetch, const Reduction &reduction, ResultKeeper &keeper, const Real &zero, Args... args) const;
 
-      BiEllpack& operator=( const BiEllpack& source ) = default;
+            BiEllpack &operator=(const BiEllpack &source) = default;
 
-      template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ >
-      BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, Organization_, WarpSize >& source );
+            template <typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_>
+            BiEllpack &operator=(const BiEllpack<Device_, Index_, IndexAllocator_, Organization_, WarpSize> &source);
 
-      void save( File& file ) const;
+            void save(File &file) const;
 
-      void load( File& file );
+            void load(File &file);
 
-      void printStructure( std::ostream& str ) const;
+            void printStructure(std::ostream &str) const;
 
-      // TODO: nvcc needs this public because of lambda function used inside
-      template< typename SizesHolder = OffsetsHolder >
-      void performRowBubbleSort( const SizesHolder& segmentsSize );
+            // TODO: nvcc needs this public because of lambda function used inside
+            template <typename SizesHolder = OffsetsHolder>
+            void performRowBubbleSort(const SizesHolder &segmentsSize);
 
-      // TODO: the same as  above
-      template< typename SizesHolder = OffsetsHolder >
-      void computeColumnSizes( const SizesHolder& segmentsSizes );
+            // TODO: the same as  above
+            template <typename SizesHolder = OffsetsHolder>
+            void computeColumnSizes(const SizesHolder &segmentsSizes);
 
-   protected:
+         protected:
+            static constexpr int getWarpSize() { return WarpSize; };
 
-      static constexpr int getWarpSize() { return WarpSize; };
+            static constexpr int getLogWarpSize() { return std::log2(WarpSize); };
 
-      static constexpr int getLogWarpSize() { return std::log2( WarpSize ); };
+            template <typename SizesHolder = OffsetsHolder>
+            void verifyRowPerm(const SizesHolder &segmentsSizes);
 
-      template< typename SizesHolder = OffsetsHolder >
-      void verifyRowPerm( const SizesHolder& segmentsSizes );
+            template <typename SizesHolder = OffsetsHolder>
+            void verifyRowLengths(const SizesHolder &segmentsSizes);
 
-      template< typename SizesHolder = OffsetsHolder >
-      void verifyRowLengths( const SizesHolder& segmentsSizes );
+            IndexType getStripLength(const IndexType stripIdx) const;
 
-      IndexType getStripLength( const IndexType stripIdx ) const;
+            IndexType getGroupLength(const IndexType strip, const IndexType group) const;
 
-      IndexType getGroupLength( const IndexType strip, const IndexType group ) const;
+            IndexType size = 0, storageSize = 0;
 
-      IndexType size = 0, storageSize = 0;
+            IndexType virtualRows = 0;
 
-      IndexType virtualRows = 0;
+            OffsetsHolder rowPermArray;
 
-      OffsetsHolder rowPermArray;
+            OffsetsHolder groupPointers;
 
-      OffsetsHolder groupPointers;
+            // TODO: Replace later
+            __cuda_callable__ Index power(const IndexType number, const IndexType exponent) const
+            {
+               if (exponent >= 0)
+               {
+                  IndexType result = 1;
+                  for (IndexType i = 0; i < exponent; i++)
+                     result *= number;
+                  return result;
+               }
+               return 0;
+            };
 
-      // TODO: Replace later
-      __cuda_callable__ Index power( const IndexType number, const IndexType exponent ) const
-      {
-          if( exponent >= 0 )
-          {
-              IndexType result = 1;
-              for( IndexType i = 0; i < exponent; i++ )
-                  result *= number;
-              return result;
-          }
-          return 0;
-      };
-
-      template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int WarpSize_ >
-      friend class BiEllpack;
-};
+            template <typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int WarpSize_>
+            friend class BiEllpack;
+         };
 
       } // namespace Segments
-   }  // namespace Algorithms
+   }    // namespace Algorithms
 } // namespace TNL
 
 #include <TNL/Algorithms/Segments/BiEllpack.hpp>
diff --git a/src/TNL/Algorithms/Segments/BiEllpack.hpp b/src/TNL/Algorithms/Segments/BiEllpack.hpp
index 2c44eb27a91ee006f330d14fec5d40cf7deee3fc..d0847b6a3db19e13858af4a668b951360d3b50df 100644
--- a/src/TNL/Algorithms/Segments/BiEllpack.hpp
+++ b/src/TNL/Algorithms/Segments/BiEllpack.hpp
@@ -131,7 +131,7 @@ performRowBubbleSort( const SizesHolder& segmentsSizes )
    if( segmentsSizes.getSize() == 0 )
       return;
 
-   this->rowPermArray.forEachElement( [] __cuda_callable__ ( const IndexType idx, IndexType& value ) { value = idx; } );
+   this->rowPermArray.forAllElements( [] __cuda_callable__ ( const IndexType idx, IndexType& value ) { value = idx; } );
 
    //if( std::is_same< DeviceType, Devices::Host >::value )
    {
@@ -443,12 +443,12 @@ template< typename Device,
           typename IndexAllocator,
           ElementsOrganization Organization,
           int WarpSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
-   this->getConstView().forElements( first, last, f, args... );
+   this->getConstView().forElements( first, last, f );
 }
 
 template< typename Device,
@@ -456,14 +456,41 @@ template< typename Device,
           typename IndexAllocator,
           ElementsOrganization Organization,
           int WarpSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
 }
 
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization,
+          int WarpSize >
+   template< typename Function >
+void
+BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
+forSegments( IndexType begin, IndexType end, Function&& f ) const
+{
+   this->getConstView().forSegments( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization,
+          int WarpSize >
+   template< typename Function >
+void
+BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
+forEachSegment( Function&& f ) const
+{
+   this->getConstView().forEachSegment( f );
+}
+
+
 template< typename Device,
           typename Index,
           typename IndexAllocator,
diff --git a/src/TNL/Algorithms/Segments/BiEllpackSegmentView.h b/src/TNL/Algorithms/Segments/BiEllpackSegmentView.h
index 063a8db6a004689a8452df87a16c3d4647794cce..e245978122b1708e5f52494fd92ec582494185f4 100644
--- a/src/TNL/Algorithms/Segments/BiEllpackSegmentView.h
+++ b/src/TNL/Algorithms/Segments/BiEllpackSegmentView.h
@@ -24,7 +24,7 @@ template< typename Index,
 class BiEllpackSegmentView
 {
    public:
-      
+
       static constexpr int getWarpSize() { return WarpSize; };
 
       static constexpr int getLogWarpSize() { static_assert( WarpSize == 32, "nvcc does not allow constexpr log2" ); return 5; }// TODO: return std::log2( WarpSize ); };
@@ -37,17 +37,18 @@ class BiEllpackSegmentView
 
       /**
        * \brief Constructor.
-       * 
+       *
        * \param offset is offset of the first group of the strip the segment belongs to.
        * \param size is the segment size
        * \param inStripIdx is index of the segment within its strip.
        * \param groupsWidth is a static vector containing widths of the strip groups
        */
       __cuda_callable__
-      BiEllpackSegmentView( const IndexType offset,
+      BiEllpackSegmentView( const IndexType segmentIdx,
+                            const IndexType offset,
                             const IndexType inStripIdx,
                             const GroupsWidthType& groupsWidth )
-      : groupOffset( offset ), inStripIdx( inStripIdx ), segmentSize( TNL::sum( groupsWidth ) ), groupsWidth( groupsWidth ){};
+      : segmentIdx( segmentIdx ), groupOffset( offset ), inStripIdx( inStripIdx ), segmentSize( TNL::sum( groupsWidth ) ), groupsWidth( groupsWidth ){};
 
       __cuda_callable__
       IndexType getSize() const
@@ -79,9 +80,15 @@ class BiEllpackSegmentView
             return offset + inStripIdx + localIdx * groupHeight;
       };
 
+      __cuda_callable__
+      const IndexType& getSegmentIndex() const
+      {
+         return this->segmentIdx;
+      };
+
       protected:
 
-         IndexType groupOffset, inStripIdx, segmentSize;
+         IndexType segmentIdx, groupOffset, inStripIdx, segmentSize;
 
          GroupsWidthType groupsWidth;
 };
diff --git a/src/TNL/Algorithms/Segments/BiEllpackView.h b/src/TNL/Algorithms/Segments/BiEllpackView.h
index 860f4d213fb19e9058573fb3f07441bfcde03204..44629ea719e9882de532b6bdc8d66cd1ae9d2435 100644
--- a/src/TNL/Algorithms/Segments/BiEllpackView.h
+++ b/src/TNL/Algorithms/Segments/BiEllpackView.h
@@ -32,13 +32,13 @@ class BiEllpackView
 
       using DeviceType = Device;
       using IndexType = std::remove_const_t< Index >;
-      using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstOffsetsView = typename OffsetsView::ConstViewType;
       using ViewType = BiEllpackView;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = BiEllpackView< Device_, Index_ >;
-      using ConstViewType = BiEllpackView< Device, std::add_const_t< Index > >;
-      using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >;
+      using ViewTemplate = BiEllpackView< Device_, Index_, Organization, WarpSize >;
+      using ConstViewType = BiEllpackView< Device, std::add_const_t< Index >, Organization, WarpSize >;
+      using SegmentViewType = BiEllpackSegmentView< IndexType, Organization, WarpSize >;
 
       static constexpr bool havePadding() { return true; };
 
@@ -111,12 +111,17 @@ class BiEllpackView
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType first, IndexType last, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/BiEllpackView.hpp b/src/TNL/Algorithms/Segments/BiEllpackView.hpp
index 7b1e2024c96748aabf005ac08b7f54b926579977..e861e8f76605ffc73679c2ef05c1a126686b23a1 100644
--- a/src/TNL/Algorithms/Segments/BiEllpackView.hpp
+++ b/src/TNL/Algorithms/Segments/BiEllpackView.hpp
@@ -129,7 +129,12 @@ template< typename Device,
 __cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >::
 getConstView() const -> const ConstViewType
 {
-   return ConstViewType( size, storageSize, virtualRows, rowPermArray.getConstView(), groupPointers.getConstView() );
+   BiEllpackView* this_ptr = const_cast< BiEllpackView* >( this );
+   return ConstViewType( size,
+                         storageSize,
+                         virtualRows,
+                         this_ptr->rowPermArray.getView(),
+                         this_ptr->groupPointers.getView() );
 }
 
 template< typename Device,
@@ -255,14 +260,14 @@ template< typename Device,
           typename Index,
           ElementsOrganization Organization,
           int WarpSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 BiEllpackView< Device, Index, Organization, WarpSize >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
    const auto segmentsPermutationView = this->rowPermArray.getConstView();
    const auto groupPointersView = this->groupPointers.getConstView();
-   auto work = [=] __cuda_callable__ ( IndexType segmentIdx, Args... args ) mutable {
+   auto work = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
       const IndexType strip = segmentIdx / getWarpSize();
       const IndexType firstGroupInStrip = strip * ( getLogWarpSize() + 1 );
       const IndexType rowStripPerm = segmentsPermutationView[ segmentIdx ] - strip * getWarpSize();
@@ -298,19 +303,48 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
          groupHeight /= 2;
       }
    };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last , work, args... );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last , work );
 }
 
 template< typename Device,
           typename Index,
           ElementsOrganization Organization,
           int WarpSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 BiEllpackView< Device, Index, Organization, WarpSize >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int WarpSize >
+   template< typename Function >
+void
+BiEllpackView< Device, Index, Organization, WarpSize >::
+forSegments( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = this->getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
+      auto segment = view.getSegmentView( segmentIdx );
+      function( segment );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int WarpSize >
+   template< typename Function >
+void
+BiEllpackView< Device, Index, Organization, WarpSize >::
+forEachSegment( Function&& f ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h
index 998ed4244ec1a18cc33fa985f3fc7de3a2957ce3..e63b4c8da49b15644ce7f2436d7c78de3a76ef71 100644
--- a/src/TNL/Algorithms/Segments/CSR.h
+++ b/src/TNL/Algorithms/Segments/CSR.h
@@ -104,15 +104,21 @@ class CSR
 
       /***
        * \brief Go over all segments and for each segment element call
-       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * function 'f'. The return type of 'f' is bool.
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType begin, IndexType end, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
+
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/CSR.hpp b/src/TNL/Algorithms/Segments/CSR.hpp
index 3e729938e4de37ca823d76e40a1b2bc62a6e4f92..823393c2f2e1cb7bcbed638701157cca665df988 100644
--- a/src/TNL/Algorithms/Segments/CSR.hpp
+++ b/src/TNL/Algorithms/Segments/CSR.hpp
@@ -198,7 +198,7 @@ auto
 CSR< Device, Index, Kernel, IndexAllocator >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
+   return SegmentViewType( segmentIdx, offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
 }
 
 template< typename Device,
@@ -227,24 +227,48 @@ template< typename Device,
           typename Index,
           typename Kernel,
           typename IndexAllocator >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 CSR< Device, Index, Kernel, IndexAllocator >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType begin, IndexType end, Function&& f ) const
 {
-   this->getConstView().forElements( first, last, f, args... );
+   this->getConstView().forElements( begin, end, f );
 }
 
 template< typename Device,
           typename Index,
           typename Kernel,
-          typename IndexAllocator>
-   template< typename Function, typename... Args >
+          typename IndexAllocator >
+   template< typename Function >
+void
+CSR< Device, Index, Kernel, IndexAllocator >::
+forAllElements( Function&& f ) const
+{
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          typename Kernel,
+          typename IndexAllocator >
+   template< typename Function >
+void
+CSR< Device, Index, Kernel, IndexAllocator >::
+forSegments( IndexType begin, IndexType end, Function&& f ) const
+{
+   this->getConstView().forSegments( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          typename Kernel,
+          typename IndexAllocator >
+   template< typename Function >
 void
 CSR< Device, Index, Kernel, IndexAllocator >::
-forEachElement( Function& f, Args... args ) const
+forEachSegment( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->getConstView().forEachSegment( f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp b/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp
index a9f921c73cea8c52b473837be21b6802ebc64f1a..979a545240586c097936859ba2ac7a99c6efba0f 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp
@@ -54,7 +54,7 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks,
 
    __shared__ Real streamShared[ WarpsCount ][ StreamedSharedElementsPerWarp ];
    __shared__ Real multivectorShared[ CudaBlockSize / WarpSize ];
-   __shared__ BlockType sharedBlocks[ WarpsCount ];
+   //__shared__ BlockType sharedBlocks[ WarpsCount ];
 
    const Index index = ( ( gridIdx * TNL::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x ) + threadIdx.x;
    const Index blockIdx = index / WarpSize;
diff --git a/src/TNL/Algorithms/Segments/CSRView.h b/src/TNL/Algorithms/Segments/CSRView.h
index 230063c7a31642a166404a533b0cc4b3919808df..cd9e44a2a323f1185363cdb46dadfb69a74d8566 100644
--- a/src/TNL/Algorithms/Segments/CSRView.h
+++ b/src/TNL/Algorithms/Segments/CSRView.h
@@ -101,16 +101,21 @@ class CSRView
 
       /***
        * \brief Go over all segments and for each segment element call
-       * function 'f' with arguments 'args'. The return type of 'f' is bool.
+       * function 'f'. The return type of 'f' is bool.
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType begin, IndexType end, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/CSRView.hpp b/src/TNL/Algorithms/Segments/CSRView.hpp
index 5d71a2a67071a5f514aadee2d7b28fe2aefab863..bb40dc9f612282b22a4b374ae2ab936a62588f9e 100644
--- a/src/TNL/Algorithms/Segments/CSRView.hpp
+++ b/src/TNL/Algorithms/Segments/CSRView.hpp
@@ -177,38 +177,65 @@ auto
 CSRView< Device, Index, Kernel >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
-   return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
+   return SegmentViewType( segmentIdx, offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
 }
 
 template< typename Device,
           typename Index,
           typename Kernel >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 CSRView< Device, Index, Kernel >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType begin, IndexType end, Function&& f ) const
 {
    const auto offsetsView = this->offsets;
-   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx ) mutable {
       const IndexType begin = offsetsView[ segmentIdx ];
       const IndexType end = offsetsView[ segmentIdx + 1 ];
       IndexType localIdx( 0 );
       bool compute( true );
       for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
-         f( segmentIdx, localIdx++, globalIdx, compute, args... );
+         f( segmentIdx, localIdx++, globalIdx, compute );
    };
-   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   Algorithms::ParallelFor< Device >::exec( begin, end, l );
 }
 
 template< typename Device,
           typename Index,
           typename Kernel >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 CSRView< Device, Index, Kernel >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          typename Kernel >
+   template< typename Function >
+void
+CSRView< Device, Index, Kernel >::
+forSegments( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = this->getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
+      auto segment = view.getSegmentView( segmentIdx );
+      function( segment );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          typename Kernel >
+   template< typename Function >
+void
+CSRView< Device, Index, Kernel >::
+forEachSegment( Function&& f ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpack.h b/src/TNL/Algorithms/Segments/ChunkedEllpack.h
index ac9c29f766961fbfb8555f0fce790121bfc9d144..b6bdd5bf16d13c94137dbb161e7eb13877e91a31 100644
--- a/src/TNL/Algorithms/Segments/ChunkedEllpack.h
+++ b/src/TNL/Algorithms/Segments/ChunkedEllpack.h
@@ -35,11 +35,11 @@ class ChunkedEllpack
       template< typename Device_, typename Index_ >
       using ViewTemplate = ChunkedEllpackView< Device_, Index_, Organization >;
       using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< IndexType >, Organization >;
-      using SegmentViewType = ChunkedEllpackSegmentView< IndexType, Organization >;
-      using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
+      using SegmentViewType = typename ViewType::SegmentViewType;
+      using ChunkedEllpackSliceInfoType = typename ViewType::ChunkedEllpackSliceInfoType; // details::ChunkedEllpackSliceInfo< IndexType >;
       //TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >;
-      using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
-      using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
+      using ChunkedEllpackSliceInfoAllocator = typename ViewType::ChunkedEllpackSliceInfoAllocator; // typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
+      using ChunkedEllpackSliceInfoContainer = typename ViewType::ChunkedEllpackSliceInfoContainer; // Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
 
       static constexpr bool havePadding() { return true; };
 
@@ -96,12 +96,17 @@ class ChunkedEllpack
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType first, IndexType last, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp b/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp
index d2ffee06c6114189b56b70cebe5cff7d44e9c9ca..69e6b4c67f648ffbe1146dc38463682fd3c4079d 100644
--- a/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp
+++ b/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp
@@ -391,24 +391,48 @@ template< typename Device,
           typename Index,
           typename IndexAllocator,
           ElementsOrganization Organization >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 ChunkedEllpack< Device, Index, IndexAllocator, Organization >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
-   this->getConstView().forElements( first, last, f, args... );
+   this->getConstView().forElements( first, last, f );
 }
 
 template< typename Device,
           typename Index,
           typename IndexAllocator,
           ElementsOrganization Organization >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 ChunkedEllpack< Device, Index, IndexAllocator, Organization >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+ChunkedEllpack< Device, Index, IndexAllocator, Organization >::
+forSegments( IndexType begin, IndexType end, Function&& f ) const
+{
+   this->getConstView().forSegments( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+ChunkedEllpack< Device, Index, IndexAllocator, Organization >::
+forEachSegment( Function&& f ) const
+{
+   this->getConstView().forEachSegment( f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpackSegmentView.h b/src/TNL/Algorithms/Segments/ChunkedEllpackSegmentView.h
index 263aaa88d2e159ceaf7abdfe0c0055c6f9f8085d..1f30cbe81063451a46eac13ba7f8241e617aabf5 100644
--- a/src/TNL/Algorithms/Segments/ChunkedEllpackSegmentView.h
+++ b/src/TNL/Algorithms/Segments/ChunkedEllpackSegmentView.h
@@ -26,15 +26,16 @@ class ChunkedEllpackSegmentView< Index, ColumnMajorOrder >
       using IndexType = Index;
 
       __cuda_callable__
-      ChunkedEllpackSegmentView( const IndexType offset,
+      ChunkedEllpackSegmentView( const IndexType segmentIdx,
+                                 const IndexType offset,
                                  const IndexType size,
                                  const IndexType chunkSize,      // this is only for compatibility with the following specialization
                                  const IndexType chunksInSlice ) // this one as well - both can be replaced when we could use constexprif in C++17
-      : segmentOffset( offset ), segmentSize( size ){};
+      : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ){};
 
       __cuda_callable__
       ChunkedEllpackSegmentView( const ChunkedEllpackSegmentView& view )
-      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ){};
+      : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ){};
 
       __cuda_callable__
       IndexType getSize() const
@@ -49,9 +50,15 @@ class ChunkedEllpackSegmentView< Index, ColumnMajorOrder >
          return segmentOffset + localIndex;
       };
 
+      __cuda_callable__
+      const IndexType& getSegmentIndex() const
+      {
+         return this->segmentIdx;
+      };
+
       protected:
-         
-         IndexType segmentOffset, segmentSize;
+
+         IndexType segmentIdx, segmentOffset, segmentSize;
 };
 
 template< typename Index >
@@ -62,13 +69,19 @@ class ChunkedEllpackSegmentView< Index, RowMajorOrder >
       using IndexType = Index;
 
       __cuda_callable__
-      ChunkedEllpackSegmentView( const IndexType offset,
+      ChunkedEllpackSegmentView( const IndexType segmentIdx,
+                                 const IndexType offset,
                                  const IndexType size,
                                  const IndexType chunkSize,
                                  const IndexType chunksInSlice )
-      : segmentOffset( offset ), segmentSize( size ),
+      : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ),
         chunkSize( chunkSize ), chunksInSlice( chunksInSlice ){};
 
+      __cuda_callable__
+      ChunkedEllpackSegmentView( const ChunkedEllpackSegmentView& view )
+      : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ),
+        chunkSize( view.chunkSize ), chunksInSlice( view.chunksInSlice ){};
+
       __cuda_callable__
       IndexType getSize() const
       {
@@ -84,9 +97,26 @@ class ChunkedEllpackSegmentView< Index, RowMajorOrder >
          return segmentOffset + inChunkOffset * chunksInSlice + chunkIdx;
       };
 
+      __cuda_callable__
+      const IndexType& getSegmentIndex() const
+      {
+         return this->segmentIdx;
+      };
+
+      __cuda_callable__
+      ChunkedEllpackSegmentView& operator = ( const ChunkedEllpackSegmentView& view ) const
+      {
+         this->segmentIdx = view.segmentIdx;
+         this->segmentOffset = view.segmentOffset;
+         this->segmentSize = view.segmentSize;
+         this->chunkSize = view.chunkSize;
+         this->chunksInSlice = view.chunksInSlice;
+         return *this;
+      }
+
       protected:
-         
-         IndexType segmentOffset, segmentSize, chunkSize, chunksInSlice;
+
+         IndexType segmentIdx, segmentOffset, segmentSize, chunkSize, chunksInSlice;
 };
 
       } //namespace Segments
diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpackView.h b/src/TNL/Algorithms/Segments/ChunkedEllpackView.h
index 18f08544e3233c2d8e2dfb6f1ffcfc1b477957bf..196c0764e1109ea66e433443c068553eed495dc7 100644
--- a/src/TNL/Algorithms/Segments/ChunkedEllpackView.h
+++ b/src/TNL/Algorithms/Segments/ChunkedEllpackView.h
@@ -12,6 +12,7 @@
 
 #include <type_traits>
 
+#include <TNL/TypeTraits.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/Segments/ElementsOrganization.h>
 #include <TNL/Algorithms/Segments/ChunkedEllpackSegmentView.h>
@@ -31,12 +32,12 @@ class ChunkedEllpackView
 
       using DeviceType = Device;
       using IndexType = std::remove_const_t< Index >;
-      using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >;
+      using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >;
       using ConstOffsetsView = typename OffsetsView::ConstViewType;
       using ViewType = ChunkedEllpackView;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = ChunkedEllpackView< Device_, Index_ >;
-      using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index > >;
+      using ViewTemplate = ChunkedEllpackView< Device_, Index_, Organization >;
+      using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, Organization >;
       using SegmentViewType = ChunkedEllpackSegmentView< IndexType, Organization >;
       using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
       using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
@@ -124,12 +125,17 @@ class ChunkedEllpackView
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType begin, IndexType end, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp b/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp
index 163ac448e831d378399bf6b2fcf183561ca2d005..147b362d125bdad627bd58d728ab0a1573be9345 100644
--- a/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp
+++ b/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp
@@ -142,7 +142,7 @@ typename ChunkedEllpackView< Device, Index, Organization >::ViewType
 ChunkedEllpackView< Device, Index, Organization >::
 getView()
 {
-   return ViewType( size, chunksInSlice, desiredChunkSize,
+   return ViewType( size, storageSize, chunksInSlice, desiredChunkSize,
                     rowToChunkMapping.getView(),
                     rowToSliceMapping.getView(),
                     chunksToSegmentsMapping.getView(),
@@ -157,12 +157,13 @@ template< typename Device,
 __cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >::
 getConstView() const -> const ConstViewType
 {
-   return ConstViewType( size, chunksInSlice, desiredChunkSize,
-                         rowToChunkMapping.getConstView(),
-                         rowToSliceMapping.getConstView(),
-                         chunksToSegmentsMapping.getConstView(),
-                         rowPointers.getConstView(),
-                         slices.getConstView(),
+   ChunkedEllpackView* this_ptr = const_cast< ChunkedEllpackView* >( this );
+   return ConstViewType( size, storageSize, chunksInSlice, desiredChunkSize,
+                         this_ptr->rowToChunkMapping.getView(),
+                         this_ptr->rowToSliceMapping.getView(),
+                         this_ptr->chunksToSegmentsMapping.getView(),
+                         this_ptr->rowPointers.getView(),
+                         this_ptr->slices.getView(),
                          numberOfSlices );
 }
 
@@ -297,16 +298,16 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 template< typename Device,
           typename Index,
           ElementsOrganization Organization >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 ChunkedEllpackView< Device, Index, Organization >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
    const IndexType chunksInSlice = this->chunksInSlice;
    auto rowToChunkMapping = this->rowToChunkMapping;
    auto rowToSliceMapping = this->rowToSliceMapping;
    auto slices = this->slices;
-   auto work = [=] __cuda_callable__ ( IndexType segmentIdx, Args... args ) mutable {
+   auto work = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
       const IndexType sliceIdx = rowToSliceMapping[ segmentIdx ];
 
       IndexType firstChunkOfSegment( 0 );
@@ -328,7 +329,7 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
          IndexType end = begin + segmentSize;
          IndexType localIdx( 0 );
          for( IndexType j = begin; j < end && compute; j++ )
-            f( segmentIdx, localIdx++, j, compute, args...);
+            f( segmentIdx, localIdx++, j, compute );
       }
       else
       {
@@ -339,25 +340,55 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
             IndexType end = begin + chunksInSlice * chunkSize;
             for( IndexType j = begin; j < end && compute; j += chunksInSlice )
             {
-               f( segmentIdx, localIdx++, j, compute, args...);
+               f( segmentIdx, localIdx++, j, compute );
             }
          }
       }
    };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last , work, args... );
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, work );
 }
 
 template< typename Device,
           typename Index,
           ElementsOrganization Organization >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 ChunkedEllpackView< Device, Index, Organization >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
 }
 
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+ChunkedEllpackView< Device, Index, Organization >::
+forSegments( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = this->getConstView();
+   using SVType = decltype( view.getSegmentView( IndexType() ) );
+   static_assert( std::is_same< SVType, SegmentViewType >::value, "" );
+   auto f = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
+      auto segment = view.getSegmentView( segmentIdx );
+      function( segment );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+ChunkedEllpackView< Device, Index, Organization >::
+forEachSegment( Function&& f ) const
+{
+   this->forSegments( 0, this->getSegmentsCount(), f );
+}
+
+
 template< typename Device,
           typename Index,
           ElementsOrganization Organization >
@@ -371,7 +402,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio
    {
       //segmentsReductionKernel( 0, first, last, fetch, reduction, keeper, zero, args... );
       //return;
-      
+
       for( IndexType segmentIdx = first; segmentIdx < last; segmentIdx++ )
       {
          const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
diff --git a/src/TNL/Algorithms/Segments/Ellpack.h b/src/TNL/Algorithms/Segments/Ellpack.h
index 1c14ced75fc396d4a7f93adea02df776ae4d13e2..c88ba6a1d01d4c6e49a6c2b6af2850bd14a8f3f0 100644
--- a/src/TNL/Algorithms/Segments/Ellpack.h
+++ b/src/TNL/Algorithms/Segments/Ellpack.h
@@ -96,12 +96,17 @@ class Ellpack
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType first, IndexType last, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/Ellpack.hpp b/src/TNL/Algorithms/Segments/Ellpack.hpp
index 3feda5dbc44fdb1027e71e52d7ff32b07767cca3..124e3dfc2cabb13d7533e8269c27e3ce85ca50df 100644
--- a/src/TNL/Algorithms/Segments/Ellpack.hpp
+++ b/src/TNL/Algorithms/Segments/Ellpack.hpp
@@ -109,7 +109,7 @@ auto
 Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
 getView() -> ViewType
 {
-   return ViewType( segmentSize, size, alignedSize );
+   return ViewType( size, segmentSize, alignedSize );
 }
 
 template< typename Device,
@@ -121,7 +121,7 @@ auto
 Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
 getConstView() const -> const ConstViewType
 {
-   return ConstViewType( segmentSize, size, alignedSize );
+   return ConstViewType( size, segmentSize, alignedSize );
 }
 
 template< typename Device,
@@ -242,9 +242,9 @@ __cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Ali
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    if( Organization == RowMajorOrder )
-      return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+      return SegmentViewType( segmentIdx, segmentIdx * this->segmentSize, this->segmentSize, 1 );
    else
-      return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize );
+      return SegmentViewType( segmentIdx, segmentIdx, this->segmentSize, this->alignedSize );
 }
 
 template< typename Device,
@@ -252,12 +252,12 @@ template< typename Device,
           typename IndexAllocator,
           ElementsOrganization Organization,
           int Alignment >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
-   this->getConstView().forElements( first, last, f, args... );
+   this->getConstView().forElements( first, last, f );
 }
 
 template< typename Device,
@@ -265,12 +265,38 @@ template< typename Device,
           typename IndexAllocator,
           ElementsOrganization Organization,
           int Alignment >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization,
+          int Alignment >
+   template< typename Function >
+void
+Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
+forSegments( IndexType begin, IndexType end, Function&& f ) const
+{
+   this->getConstView().forSegments( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization,
+          int Alignment >
+   template< typename Function >
+void
+Ellpack< Device, Index, IndexAllocator, Organization, Alignment >::
+forEachSegment( Function&& f ) const
+{
+   this->getConstView().forEachSegment( f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/EllpackView.h b/src/TNL/Algorithms/Segments/EllpackView.h
index 4110e8c15d32484bd0475201386f07345a8a34c2..77d0d8b7b239262cc8a50ea947fd3e5fbf93a00a 100644
--- a/src/TNL/Algorithms/Segments/EllpackView.h
+++ b/src/TNL/Algorithms/Segments/EllpackView.h
@@ -47,7 +47,10 @@ class EllpackView
       EllpackView();
 
       __cuda_callable__
-      EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize );
+      EllpackView( IndexType segmentsCount, IndexType segmentSize, IndexType alignedSize );
+
+      __cuda_callable__
+      EllpackView( IndexType segmentsCount, IndexType segmentSize );
 
       __cuda_callable__
       EllpackView( const EllpackView& ellpackView );
@@ -92,12 +95,17 @@ class EllpackView
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
 
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
@@ -116,7 +124,7 @@ class EllpackView
 
    protected:
 
-      IndexType segmentSize, size, alignedSize;
+      IndexType segmentSize, segmentsCount, alignedSize;
 };
 
       } // namespace Segments
diff --git a/src/TNL/Algorithms/Segments/EllpackView.hpp b/src/TNL/Algorithms/Segments/EllpackView.hpp
index 7c657fd491eafdb8178ac55c104d7bf5a1edf38c..724774b539d9c2d431bb601a1efcb2639dd8d5f9 100644
--- a/src/TNL/Algorithms/Segments/EllpackView.hpp
+++ b/src/TNL/Algorithms/Segments/EllpackView.hpp
@@ -27,7 +27,7 @@ template< typename Device,
 __cuda_callable__
 EllpackView< Device, Index, Organization, Alignment >::
 EllpackView()
-   : segmentSize( 0 ), size( 0 ), alignedSize( 0 )
+   : segmentSize( 0 ), segmentsCount( 0 ), alignedSize( 0 )
 {
 }
 
@@ -37,11 +37,26 @@ template< typename Device,
           int Alignment >
 __cuda_callable__
 EllpackView< Device, Index, Organization, Alignment >::
-EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize )
-   : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize )
+EllpackView( IndexType segmentsCount, IndexType segmentSize, IndexType alignedSize )
+   : segmentSize( segmentSize ), segmentsCount( segmentsCount ), alignedSize( alignedSize )
 {
 }
 
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int Alignment >
+__cuda_callable__
+EllpackView< Device, Index, Organization, Alignment >::
+EllpackView( IndexType segmentsCount, IndexType segmentSize )
+   : segmentSize( segmentSize ), segmentsCount( segmentsCount )
+{
+   if( Organization == RowMajorOrder )
+      this->alignedSize = this->segmentsCount;
+   else
+      this->alignedSize = roundUpDivision( segmentsCount, this->getAlignment() ) * this->getAlignment();
+}
+
 template< typename Device,
           typename Index,
           ElementsOrganization Organization,
@@ -49,7 +64,7 @@ template< typename Device,
 __cuda_callable__
 EllpackView< Device, Index, Organization, Alignment >::
 EllpackView( const EllpackView& ellpack )
-   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+   : segmentSize( ellpack.segmentSize ), segmentsCount( ellpack.segmentsCount ), alignedSize( ellpack.alignedSize )
 {
 }
 
@@ -60,7 +75,7 @@ template< typename Device,
 __cuda_callable__
 EllpackView< Device, Index, Organization, Alignment >::
 EllpackView( const EllpackView&& ellpack )
-   : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize )
+   : segmentSize( ellpack.segmentSize ), segmentsCount( ellpack.segmentsCount ), alignedSize( ellpack.alignedSize )
 {
 }
 
@@ -95,7 +110,7 @@ typename EllpackView< Device, Index, Organization, Alignment >::ViewType
 EllpackView< Device, Index, Organization, Alignment >::
 getView()
 {
-   return ViewType( segmentSize, size, alignedSize );
+   return ViewType( segmentSize, segmentsCount, alignedSize );
 }
 
 template< typename Device,
@@ -107,7 +122,7 @@ auto
 EllpackView< Device, Index, Organization, Alignment >::
 getConstView() const -> const ConstViewType
 {
-   return ConstViewType( segmentSize, size, alignedSize );
+   return ConstViewType( segmentsCount, segmentSize, alignedSize );
 }
 
 template< typename Device,
@@ -117,7 +132,7 @@ template< typename Device,
 __cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >::
 getSegmentsCount() const -> IndexType
 {
-   return this->size;
+   return this->segmentsCount;
 }
 
 template< typename Device,
@@ -137,7 +152,7 @@ template< typename Device,
 __cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >::
 getSize() const -> IndexType
 {
-   return this->size * this->segmentSize;
+   return this->segmentsCount * this->segmentSize;
 }
 
 
@@ -172,23 +187,23 @@ __cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    if( Organization == RowMajorOrder )
-      return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 );
+      return SegmentViewType( segmentIdx, segmentIdx * this->segmentSize, this->segmentSize, 1 );
    else
-      return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize );
+      return SegmentViewType( segmentIdx, segmentIdx, this->segmentSize, this->alignedSize );
 }
 
 template< typename Device,
           typename Index,
           ElementsOrganization Organization,
           int Alignment >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void EllpackView< Device, Index, Organization, Alignment >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
    if( Organization == RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx ) mutable {
          const IndexType begin = segmentIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          IndexType localIdx( 0 );
@@ -196,21 +211,21 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
          for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
             f( segmentIdx, localIdx++, globalIdx, compute );
       };
-      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+      Algorithms::ParallelFor< Device >::exec( first, last, l );
    }
    else
    {
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx ) mutable {
          const IndexType begin = segmentIdx;
          const IndexType end = storageSize;
          IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += alignedSize )
-            f( segmentIdx, localIdx++, globalIdx, compute, args... );
+            f( segmentIdx, localIdx++, globalIdx, compute );
       };
-      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+      Algorithms::ParallelFor< Device >::exec( first, last, l );
    }
 }
 
@@ -218,11 +233,38 @@ template< typename Device,
           typename Index,
           ElementsOrganization Organization,
           int Alignment >
-   template< typename Function, typename... Args >
+   template< typename Function >
+void EllpackView< Device, Index, Organization, Alignment >::
+forAllElements( Function&& f ) const
+{
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int Alignment >
+   template< typename Function >
+void EllpackView< Device, Index, Organization, Alignment >::
+forSegments( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = this->getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
+      auto segment = view.getSegmentView( segmentIdx );
+      function( segment );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int Alignment >
+   template< typename Function >
 void EllpackView< Device, Index, Organization, Alignment >::
-forEachElement( Function& f, Args... args ) const
+forEachSegment( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f );
 }
 
 template< typename Device,
@@ -288,7 +330,7 @@ EllpackView< Device, Index, Organization, Alignment >::
 operator=( const EllpackView< Device, Index, Organization, Alignment >& view )
 {
    this->segmentSize = view.segmentSize;
-   this->size = view.size;
+   this->segmentsCount = view.segmentsCount;
    this->alignedSize = view.alignedSize;
    return *this;
 }
@@ -301,7 +343,7 @@ void EllpackView< Device, Index, Organization, Alignment >::
 save( File& file ) const
 {
    file.save( &segmentSize );
-   file.save( &size );
+   file.save( &segmentsCount );
    file.save( &alignedSize );
 }
 
@@ -313,7 +355,7 @@ void EllpackView< Device, Index, Organization, Alignment >::
 load( File& file )
 {
    file.load( &segmentSize );
-   file.load( &size );
+   file.load( &segmentsCount );
    file.load( &alignedSize );
 }
 
diff --git a/src/TNL/Algorithms/Segments/SegmentView.h b/src/TNL/Algorithms/Segments/SegmentView.h
index a06017e6d44d3f5933d0ae258842418d89473742..ecf1c95f6e387bfac7e2623374d55f8dc1055415 100644
--- a/src/TNL/Algorithms/Segments/SegmentView.h
+++ b/src/TNL/Algorithms/Segments/SegmentView.h
@@ -28,17 +28,18 @@ class SegmentView< Index, ColumnMajorOrder >
       using IndexType = Index;
 
       __cuda_callable__
-      SegmentView( const IndexType offset,
+      SegmentView( const IndexType segmentIdx,
+                   const IndexType offset,
                    const IndexType size,
                    const IndexType step )
-      : segmentOffset( offset ), segmentSize( size ), step( step ){};
+      : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ), step( step ){};
 
       __cuda_callable__
       SegmentView( const SegmentView& view )
-      : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){};
+      : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){};
 
       __cuda_callable__
-      IndexType getSize() const
+      const IndexType& getSize() const
       {
          return this->segmentSize;
       };
@@ -50,9 +51,15 @@ class SegmentView< Index, ColumnMajorOrder >
          return segmentOffset + localIndex * step;
       };
 
+      __cuda_callable__
+      const IndexType& getSegmentIndex() const
+      {
+         return this->segmentIdx;
+      };
+
       protected:
-         
-         IndexType segmentOffset, segmentSize, step;
+
+         IndexType segmentIdx, segmentOffset, segmentSize, step;
 };
 
 template< typename Index >
@@ -63,13 +70,14 @@ class SegmentView< Index, RowMajorOrder >
       using IndexType = Index;
 
       __cuda_callable__
-      SegmentView( const IndexType offset,
+      SegmentView( const IndexType segmentIdx,
+                   const IndexType offset,
                    const IndexType size,
                    const IndexType step = 1 ) // For compatibility with previous specialization
-      : segmentOffset( offset ), segmentSize( size ){};
+      : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ){};
 
       __cuda_callable__
-      IndexType getSize() const
+      const IndexType& getSize() const
       {
          return this->segmentSize;
       };
@@ -81,9 +89,15 @@ class SegmentView< Index, RowMajorOrder >
          return segmentOffset + localIndex;
       };
 
+      __cuda_callable__
+      const IndexType& getSegmentIndex() const
+      {
+         return this->segmentIdx;
+      };
+
       protected:
-         
-         IndexType segmentOffset, segmentSize;
+
+         IndexType segmentIdx, segmentOffset, segmentSize;
 };
 
       } //namespace Segments
diff --git a/src/TNL/Algorithms/Segments/SlicedEllpack.h b/src/TNL/Algorithms/Segments/SlicedEllpack.h
index 9b386c139f7bbfb77298ce633ae66687911265ea..942306c7516259e33a7ad6fcf7ebe5c83a376e44 100644
--- a/src/TNL/Algorithms/Segments/SlicedEllpack.h
+++ b/src/TNL/Algorithms/Segments/SlicedEllpack.h
@@ -93,12 +93,17 @@ class SlicedEllpack
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType first, IndexType last, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/SlicedEllpack.hpp b/src/TNL/Algorithms/Segments/SlicedEllpack.hpp
index 7a0bf838f21446c1c728d15283a7887283b4d4c1..82e7a85711a192024b320312066200ea825c6662 100644
--- a/src/TNL/Algorithms/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Algorithms/Segments/SlicedEllpack.hpp
@@ -275,9 +275,9 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
    const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
 
    if( Organization == RowMajorOrder )
-      return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
+      return SegmentViewType( segmentIdx, sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
    else
-      return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+      return SegmentViewType( segmentIdx, sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
 }
 
 template< typename Device,
@@ -285,12 +285,12 @@ template< typename Device,
           typename IndexAllocator,
           ElementsOrganization Organization,
           int SliceSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
-   this->getConstView().forElements( first, last, f, args... );
+   this->getConstView().forElements( first, last, f );
 }
 
 template< typename Device,
@@ -298,12 +298,38 @@ template< typename Device,
           typename IndexAllocator,
           ElementsOrganization Organization,
           int SliceSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >::
-forEachElement( Function& f, Args... args ) const
+forAllElements( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization,
+          int SliceSize >
+   template< typename Function >
+void
+SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >::
+forSegments( IndexType begin, IndexType end, Function&& f ) const
+{
+   this->getConstView().forSegments( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator,
+          ElementsOrganization Organization,
+          int SliceSize >
+   template< typename Function >
+void
+SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >::
+forEachSegment( Function&& f ) const
+{
+   this->getConstView().forEachSegment( f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/SlicedEllpackView.h b/src/TNL/Algorithms/Segments/SlicedEllpackView.h
index e05e2df87e6ea01eade1027e94d0f2f74b9a0390..2955ee3515960b6861bcd238d2134302dc28357e 100644
--- a/src/TNL/Algorithms/Segments/SlicedEllpackView.h
+++ b/src/TNL/Algorithms/Segments/SlicedEllpackView.h
@@ -36,7 +36,7 @@ class SlicedEllpackView
       template< typename Device_, typename Index_ >
       using ViewTemplate = SlicedEllpackView< Device_, Index_, Organization, SliceSize >;
       using ViewType = SlicedEllpackView;
-      using ConstViewType = ViewType;
+      using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, Organization, SliceSize >;
       using SegmentViewType = SegmentView< IndexType, Organization >;
 
       static constexpr bool havePadding() { return true; };
@@ -94,12 +94,17 @@ class SlicedEllpackView
        * When its true, the for-loop continues. Once 'f' returns false, the for-loop
        * is terminated.
        */
-      template< typename Function, typename... Args >
-      void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
+      template< typename Function >
+      void forElements( IndexType first, IndexType last, Function&& f ) const;
 
-      template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      template< typename Function >
+      void forAllElements( Function&& f ) const;
 
+      template< typename Function >
+      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+
+      template< typename Function >
+      void forEachSegment( Function&& f ) const;
 
       /***
        * \brief Go over all segments and perform a reduction in each of them.
diff --git a/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp b/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp
index 8ec4e237e37d8cfea3cf90471a32ffa04862e47f..42fdae7ea488ddd426d6f8d084b924066f9aefd2 100644
--- a/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp
@@ -217,25 +217,25 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
    const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];
 
    if( Organization == RowMajorOrder )
-      return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
+      return SegmentViewType( segmentIdx, sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
    else
-      return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
+      return SegmentViewType( segmentIdx, sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
 }
 
 template< typename Device,
           typename Index,
           ElementsOrganization Organization,
           int SliceSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
 void
 SlicedEllpackView< Device, Index, Organization, SliceSize >::
-forElements( IndexType first, IndexType last, Function& f, Args... args ) const
+forElements( IndexType first, IndexType last, Function&& f ) const
 {
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( Organization == RowMajorOrder )
    {
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx ) mutable {
          const IndexType sliceIdx = segmentIdx / SliceSize;
          const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
          const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
@@ -246,19 +246,19 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
          for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
          {
             // The following is a workaround of a bug in nvcc 11.2
-#if CUDART_VERSION == 11020            
-             f( segmentIdx, localIdx, globalIdx, compute, args... );
+#if CUDART_VERSION == 11020
+             f( segmentIdx, localIdx, globalIdx, compute );
              localIdx++;
 #else
-             f( segmentIdx, localIdx++, globalIdx, compute, args... );
+             f( segmentIdx, localIdx++, globalIdx, compute );
 #endif
          }
       };
-      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+      Algorithms::ParallelFor< Device >::exec( first, last, l );
    }
    else
    {
-      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+      auto l = [=] __cuda_callable__ ( const IndexType segmentIdx ) mutable {
          const IndexType sliceIdx = segmentIdx / SliceSize;
          const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
          //const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
@@ -269,15 +269,15 @@ forElements( IndexType first, IndexType last, Function& f, Args... args ) const
          for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize )
          {
             // The following is a workaround of a bug in nvcc 11.2
-#if CUDART_VERSION == 11020            
-            f( segmentIdx, localIdx, globalIdx, compute, args... );
+#if CUDART_VERSION == 11020
+            f( segmentIdx, localIdx, globalIdx, compute );
             localIdx++;
 #else
-            f( segmentIdx, localIdx++, globalIdx, compute, args... );
+            f( segmentIdx, localIdx++, globalIdx, compute );
 #endif
          }
       };
-      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+      Algorithms::ParallelFor< Device >::exec( first, last, l );
    }
 }
 
@@ -285,12 +285,41 @@ template< typename Device,
           typename Index,
           ElementsOrganization Organization,
           int SliceSize >
-   template< typename Function, typename... Args >
+   template< typename Function >
+void
+SlicedEllpackView< Device, Index, Organization, SliceSize >::
+forAllElements( Function&& f ) const
+{
+   this->forElements( 0, this->getSegmentsCount(), f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int SliceSize >
+   template< typename Function >
+void
+SlicedEllpackView< Device, Index, Organization, SliceSize >::
+forSegments( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = this->getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType segmentIdx ) mutable {
+      auto segment = view.getSegmentView( segmentIdx );
+      function( segment );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          int SliceSize >
+   template< typename Function >
 void
 SlicedEllpackView< Device, Index, Organization, SliceSize >::
-forEachElement( Function& f, Args... args ) const
+forEachSegment( Function&& f ) const
 {
-   this->forElements( 0, this->getSegmentsCount(), f, args... );
+   this->forSegments( 0, this->getSegmentsCount(), f );
 }
 
 template< typename Device,
diff --git a/src/TNL/Algorithms/Segments/details/BiEllpack.h b/src/TNL/Algorithms/Segments/details/BiEllpack.h
index d4dfcd5217c38288f867d0478804c4cda13bc054..29551eb1deb9314a13aa50749174b18093401140 100644
--- a/src/TNL/Algorithms/Segments/details/BiEllpack.h
+++ b/src/TNL/Algorithms/Segments/details/BiEllpack.h
@@ -36,7 +36,7 @@ class BiEllpack
       using ConstOffsetsHolderView = typename OffsetsHolderView::ConstViewType;
       using SegmentsSizes = OffsetsHolder;
       using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >;
-      
+
       static constexpr int getWarpSize() { return WarpSize; };
 
       static constexpr int getLogWarpSize() { return std::log2( WarpSize ); };
@@ -90,7 +90,6 @@ class BiEllpack
          return groupPointers[ groupOffset + 1 ] - groupPointers[ groupOffset ];
       }
 
-      
       static IndexType getGroupSize( const ConstOffsetsHolderView& groupPointers,
                                      const IndexType strip,
                                      const IndexType group )
@@ -237,7 +236,8 @@ class BiEllpack
             groupHeight /= 2;
             //std::cerr << " ROW INIT: groupIdx = " << i << " groupSize = " << groupSize << " groupWidth = " << groupsWidth[ i ] << std::endl;
          }
-         return SegmentViewType( groupPointers[ groupIdx ],
+         return SegmentViewType( segmentIdx,
+                                 groupPointers[ groupIdx ],
                                  inStripIdx,
                                  groupsWidth );
       }
@@ -261,7 +261,8 @@ class BiEllpack
             groupsWidth[ i ] = groupSize / groupHeight;
             groupHeight /= 2;
          }
-         return SegmentViewType( groupPointers[ groupIdx ],
+         return SegmentViewType( segmentIdx,
+                                 groupPointers[ groupIdx ],
                                  inStripIdx,
                                  groupsWidth );
       }
diff --git a/src/TNL/Algorithms/Segments/details/CSR.h b/src/TNL/Algorithms/Segments/details/CSR.h
index 2e2a934cb043b3b38413f16d09e6bbb3745a9ec9..b9392815db770e502f29dcc4ea9a6f07a8b269eb 100644
--- a/src/TNL/Algorithms/Segments/details/CSR.h
+++ b/src/TNL/Algorithms/Segments/details/CSR.h
@@ -97,7 +97,7 @@ class CSR
       void forElements( IndexType first, IndexType last, Function& f, Args... args ) const;
 
       template< typename Function, typename... Args >
-      void forEachElement( Function& f, Args... args ) const;
+      void forAllElements( Function& f, Args... args ) const;
 
 
       /***
diff --git a/src/TNL/Algorithms/Segments/details/ChunkedEllpack.h b/src/TNL/Algorithms/Segments/details/ChunkedEllpack.h
index af9cbc55ed1e1295b802690cb131416cd2b1de16..41e4ca4158d45ed3945360aaf55e35adacd65b72 100644
--- a/src/TNL/Algorithms/Segments/details/ChunkedEllpack.h
+++ b/src/TNL/Algorithms/Segments/details/ChunkedEllpack.h
@@ -52,10 +52,9 @@ struct ChunkedEllpackSliceInfo
    Index pointer;
 };
 
-         
 template< typename Index,
           typename Device,
-          ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization< Device >::getOrganization() >
+          ElementsOrganization Organization >
 class ChunkedEllpack
 {
    public:
@@ -170,8 +169,8 @@ class ChunkedEllpack
       SegmentViewType getSegmentViewDirect( const OffsetsHolderView& segmentsToSlicesMapping,
                                             const ChunkedEllpackSliceInfoContainerView& slices,
                                             const OffsetsHolderView& segmentsToChunksMapping,
-                                            const IndexType chunksInSlice,
-                                            const IndexType segmentIdx )
+                                            const IndexType& chunksInSlice,
+                                            const IndexType& segmentIdx )
       {
          const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ];
          IndexType firstChunkOfSegment( 0 );
@@ -185,12 +184,14 @@ class ChunkedEllpack
          const IndexType segmentSize = segmentChunksCount * chunkSize;
 
          if( Organization == RowMajorOrder )
-            return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize,
+            return SegmentViewType( segmentIdx,
+                                    sliceOffset + firstChunkOfSegment * chunkSize,
                                     segmentSize,
                                     chunkSize,
                                     chunksInSlice );
          else
-            return SegmentViewType( sliceOffset + firstChunkOfSegment,
+            return SegmentViewType( segmentIdx,
+                                    sliceOffset + firstChunkOfSegment,
                                     segmentSize,
                                     chunkSize,
                                     chunksInSlice );
@@ -215,12 +216,14 @@ class ChunkedEllpack
          const IndexType segmentSize = segmentChunksCount * chunkSize;
 
          if( Organization == RowMajorOrder )
-            return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize,
+            return SegmentViewType( segmentIdx,
+                                    sliceOffset + firstChunkOfSegment * chunkSize,
                                     segmentSize,
                                     chunkSize,
                                     chunksInSlice );
          else
-            return SegmentViewType( sliceOffset + firstChunkOfSegment,
+            return SegmentViewType( segmentIdx,
+                                    sliceOffset + firstChunkOfSegment,
                                     segmentSize,
                                     chunkSize,
                                     chunksInSlice );
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index 9c02ac9d46c028d197f935693bfc8502cec8ad9b..53c9290cabefa0251a30a489fc28b1c465b1f7e1 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -678,7 +678,7 @@ class Array
        *
        */
       template< typename Function >
-      void forEachElement( Function&& f );
+      void forAllElements( Function&& f );
 
       /**
        * \brief Process the lambda function \e f for each array element for constant instances.
@@ -706,7 +706,7 @@ class Array
        *
        */
       template< typename Function >
-      void forEachElement( Function&& f ) const;
+      void forAllElements( Function&& f ) const;
 
        /**
         * \brief Computes reduction with array elements on interval [ \e begin, \e end).
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 402168d111c9ce3046eb32e9686d89054a7e9682..f313a4cf522ebc430e13e810fd354b08a1f0acf1 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -715,9 +715,9 @@ template< typename Value,
    template< typename Function >
 void
 Array< Value, Device, Index, Allocator >::
-forEachElement( Function&& f )
+forAllElements( Function&& f )
 {
-   this->getView().forEachElement( f );
+   this->getView().forAllElements( f );
 }
 
 template< typename Value,
@@ -727,10 +727,10 @@ template< typename Value,
    template< typename Function >
 void
 Array< Value, Device, Index, Allocator >::
-forEachElement( Function&& f ) const
+forAllElements( Function&& f ) const
 {
    const auto view = this->getConstView();
-   view.forEachElement( f );
+   view.forAllElements( f );
 }
 
 template< typename Value,
diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h
index 1d3ae60ded125bdf5b7cda87ddb0a000d24c6a33..4b38460375abe28012bd76e29fa929580a13e5bf 100644
--- a/src/TNL/Containers/ArrayView.h
+++ b/src/TNL/Containers/ArrayView.h
@@ -496,7 +496,7 @@ public:
     *
     */
    template< typename Function >
-   void forEachElement( Function&& f );
+   void forAllElements( Function&& f );
 
    /**
     * \brief Process the lambda function \e f for each array element for constant instances.
@@ -524,7 +524,7 @@ public:
     *
     */
    template< typename Function >
-   void forEachElement( Function&& f ) const;
+   void forAllElements( Function&& f ) const;
 
    /**
     * \brief Computes reduction with array view elements on interval [ \e begin, \e end).
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index 9143dea1accd0b89a74901e415675f6e08ae4b2c..eeb0b1b4b6a2cfe4bc198f349b579541f60357ac 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -351,7 +351,7 @@ template< typename Value,
           typename Index >
    template< typename Function >
 void ArrayView< Value, Device, Index >::
-forEachElement( Function&& f )
+forAllElements( Function&& f )
 {
    this->forElements( 0, this->getSize(), f );
 }
@@ -361,7 +361,7 @@ template< typename Value,
           typename Index >
    template< typename Function >
 void ArrayView< Value, Device, Index >::
-forEachElement( Function&& f ) const
+forAllElements( Function&& f ) const
 {
    this->forElements( 0, this->getSize(), f );
 }
diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h
index 77c25b3a257bd61c44016990216e18e84086a189..83ec6d0b0e0c828981a2bfcdf9e1f84cb473e219 100644
--- a/src/TNL/Containers/VectorView.h
+++ b/src/TNL/Containers/VectorView.h
@@ -46,7 +46,7 @@ public:
 
    /**
     * \brief Device where the vector is allocated.
-    * 
+    *
     * See \ref Devices::Host or \ref Devices::Cuda.
     */
    using DeviceType = Device;
diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h
index f0b49128d2948c77c279e50597781cd35f96913d..d5444fada535124363a7c1de185557632878218f 100644
--- a/src/TNL/Matrices/DenseMatrix.h
+++ b/src/TNL/Matrices/DenseMatrix.h
@@ -39,7 +39,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 {
    protected:
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesVectorType = typename BaseType::ValuesType;
       using ValuesViewType = typename ValuesVectorType::ViewType;
       using SegmentsType = Algorithms::Segments::Ellpack< Device, Index, typename Allocators::Default< Device >::template Allocator< Index >, Organization, 1 >;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
@@ -92,13 +92,33 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        *
        * See \ref DenseMatrixView.
        */
-      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
+      using ConstViewType = typename DenseMatrixView< Real, Device, Index, Organization >::ConstViewType;
 
       /**
        * \brief Type for accessing matrix rows.
        */
       using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
+      /**
+       * \brief Type of vector holding values of matrix elements.
+       */
+      using typename Matrix< Real, Device, Index, RealAllocator >::ValuesType;
+
+      /**
+       * \brief Type of constant vector holding values of matrix elements.
+       */
+      using typename Matrix< Real, Device, Index, RealAllocator >::ConstValuesType;
+
+      /**
+       * \brief Type of vector view holding values of matrix elements.
+       */
+      using typename Matrix< Real, Device, Index, RealAllocator >::ValuesView;
+
+      /**
+       * \brief Type of constant vector view holding values of matrix elements.
+       */
+      using typename Matrix< Real, Device, Index, RealAllocator >::ConstValuesView;
+
       /**
        * \brief Helper type for getting self type or its modifications.
        */
@@ -363,7 +383,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref DenseMatrix::getRow
-       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forEachElement.
+       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forAllElements.
        *
        * \param row is row index of the element.
        * \param column is columns index of the element.
@@ -387,7 +407,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref DenseMatrix::getRow
-       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forEachElement.
+       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forAllElements.
        *
        * \param row is row index of the element.
        * \param column is columns index of the element.
@@ -415,7 +435,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref DenseMatrix::getRow
-       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forEachElement.
+       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -433,122 +453,98 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                        const IndexType column ) const;
 
       /**
-       * \brief Method for performing general reduction on matrix rows.
+       * \brief Method for iteration over all matrix rows for constant instances.
        *
-       * \tparam Fetch is a type of lambda function for data fetch declared as
-       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
-       *          The return type of this lambda can be any non void.
-       * \tparam Reduce is a type of lambda function for reduction declared as
-       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
-       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
-       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
-       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, const RealType& value, bool& compute )`.
+       *  The column index repeats twice only for compatibility with sparse matrices.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
        *
        * \param begin defines beginning of the range [begin,end) of rows to be processed.
        * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param fetch is an instance of lambda function for data fetch.
-       * \param reduce is an instance of lambda function for reduction.
-       * \param keep in an instance of lambda function for storing results.
-       * \param zero is zero of given reduction operation also known as idempotent element.
+       * \param function is an instance of the lambda function to be called in each row.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
        * \par Output
-       * \include DenseMatrixExample_rowsReduction.out
+       * \include DenseMatrixExample_forRows.out
        */
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero );
+      template< typename Function >
+      void forElements( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
-       * \brief Method for performing general reduction on matrix rows for constant instances.
+       * \brief Method for iteration over all matrix rows for non-constant instances.
        *
-       * \tparam Fetch is a type of lambda function for data fetch declared as
-       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
-       *          The return type of this lambda can be any non void.
-       * \tparam Reduce is a type of lambda function for reduction declared as
-       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
-       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
-       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
-       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * \tparam Function is type of lambda function that will operate on matrix elements.
+       *    It is should have form like
+       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, RealType& value, bool& compute )`.
+       *  The column index repeats twice only for compatibility with sparse matrices.
+       *  If the 'compute' variable is set to false the iteration over the row can
+       *  be interrupted.
        *
        * \param begin defines beginning of the range [begin,end) of rows to be processed.
        * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param fetch is an instance of lambda function for data fetch.
-       * \param reduce is an instance of lambda function for reduction.
-       * \param keep in an instance of lambda function for storing results.
-       * \param zero is zero of given reduction operation also known as idempotent element.
+       * \param function is an instance of the lambda function to be called in each row.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
        * \par Output
-       * \include DenseMatrixExample_rowsReduction.out
+       * \include DenseMatrixExample_forRows.out
        */
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const;
+      template< typename Function >
+      void forElements( IndexType begin, IndexType end, Function&& function );
 
       /**
-       * \brief Method for performing general reduction on ALL matrix rows.
+       * \brief This method calls \e forElements for all matrix rows (for constant instances).
        *
-       * \tparam Fetch is a type of lambda function for data fetch declared as
-       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
-       *          The return type of this lambda can be any non void.
-       * \tparam Reduce is a type of lambda function for reduction declared as
-       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
-       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
-       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
-       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * See \ref DenseMatrix::forElements.
        *
-       * \param fetch is an instance of lambda function for data fetch.
-       * \param reduce is an instance of lambda function for reduction.
-       * \param keep in an instance of lambda function for storing results.
-       * \param zero is zero of given reduction operation also known as idempotent element.
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
        * \par Output
-       * \include DenseMatrixExample_allRowsReduction.out
+       * \include DenseMatrixExample_forAllRows.out
        */
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      template< typename Function >
+      void forAllElements( Function&& function ) const;
 
       /**
-       * \brief Method for performing general reduction on ALL matrix rows for constant instances.
+       * \brief This method calls \e forElements for all matrix rows.
        *
-       * \tparam Fetch is a type of lambda function for data fetch declared as
-       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
-       *          The return type of this lambda can be any non void.
-       * \tparam Reduce is a type of lambda function for reduction declared as
-       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
-       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
-       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
-       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * See \ref DenseMatrix::forAllElements.
        *
-       * \param fetch is an instance of lambda function for data fetch.
-       * \param reduce is an instance of lambda function for reduction.
-       * \param keep in an instance of lambda function for storing results.
-       * \param zero is zero of given reduction operation also known as idempotent element.
+       * \tparam Function is a type of lambda function that will operate on matrix elements.
+       * \param function  is an instance of the lambda function to be called in each row.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
        * \par Output
-       * \include DenseMatrixExample_allRowsReduction.out
+       * \include DenseMatrixExample_forAllRows.out
        */
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      template< typename Function >
+      void forAllElements( Function&& function );
 
       /**
-       * \brief Method for iteration over all matrix rows for constant instances.
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
        *
-       * \tparam Function is type of lambda function that will operate on matrix elements.
-       *    It is should have form like
-       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, const RealType& value, bool& compute )`.
-       *  The column index repeats twice only for compatibility with sparse matrices.
-       *  If the 'compute' variable is set to false the iteration over the row can
-       *  be interrupted.
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrix::forElements where more than one thread can be mapped to each row.
        *
-       * \param begin defines beginning of the range [begin,end) of rows to be processed.
-       * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param function is an instance of the lambda function to be called in each row.
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) mutable { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrix::RowViewType.
        *
        * \par Example
        * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
@@ -556,21 +552,25 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \include DenseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function& function ) const;
+      void forRows( IndexType begin, IndexType end, Function&& function );
 
       /**
-       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
        *
-       * \tparam Function is type of lambda function that will operate on matrix elements.
-       *    It is should have form like
-       *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, RealType& value, bool& compute )`.
-       *  The column index repeats twice only for compatibility with sparse matrices.
-       *  If the 'compute' variable is set to false the iteration over the row can
-       *  be interrupted.
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrix::forElements where more than one thread can be mapped to each row.
        *
-       * \param begin defines beginning of the range [begin,end) of rows to be processed.
-       * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param function is an instance of the lambda function to be called in each row.
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrix::RowViewType.
        *
        * \par Example
        * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
@@ -578,39 +578,55 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \include DenseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function& function );
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
-       * \brief This method calls \e forElements for all matrix rows (for constant instances).
+       * \brief Method for parallel iteration over all matrix rows.
        *
-       * See \ref DenseMatrix::forElements.
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrix::forAllElements where more than one thread can be mapped to each row.
        *
-       * \tparam Function is a type of lambda function that will operate on matrix elements.
-       * \param function  is an instance of the lambda function to be called in each row.
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) mutable { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrix::RowViewType.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
        * \par Output
-       * \include DenseMatrixExample_forAllRows.out
+       * \include DenseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllRows( Function&& function );
 
       /**
-       * \brief This method calls \e forElements for all matrix rows.
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
        *
-       * See \ref DenseMatrix::forEachElement.
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrix::forAllElements where more than one thread can be mapped to each row.
        *
-       * \tparam Function is a type of lambda function that will operate on matrix elements.
-       * \param function  is an instance of the lambda function to be called in each row.
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrix::RowViewType.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
        * \par Output
-       * \include DenseMatrixExample_forAllRows.out
+       * \include DenseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
@@ -627,7 +643,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param function is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForRows( IndexType begin, IndexType end, Function& function ) const;
+      void sequentialForRows( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for non-constant instances.
@@ -644,7 +660,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param function is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForRows( IndexType begin, IndexType end, Function& function );
+      void sequentialForRows( IndexType begin, IndexType end, Function&& function );
 
       /**
        * \brief This method calls \e sequentialForRows for all matrix rows (for constant instances).
@@ -655,7 +671,7 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param function  is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForAllRows( Function& function ) const;
+      void sequentialForAllRows( Function&& function ) const;
 
       /**
        * \brief This method calls \e sequentialForRows for all matrix rows.
@@ -666,7 +682,111 @@ class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param function  is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForAllRows( Function& function );
+      void sequentialForAllRows( Function&& function );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cpp
+       * \par Output
+       * \include DenseMatrixExample_reduceRows.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero );
+
+      /**
+       * \brief Method for performing general reduction on matrix rows for constant instances.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_reduceRows.cpp
+       * \par Output
+       * \include DenseMatrixExample_reduceRows.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cpp
+       * \par Output
+       * \include DenseMatrixExample_reduceAllRows.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows for constant instances.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_reduceAllRows.cpp
+       * \par Output
+       * \include DenseMatrixExample_reduceAllRows.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Computes product of matrix and vector.
diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp
index 9e220ebac753042844af24233e45b00c39758d73..d7d4ad82f6ce88c43c59fce126aea5ae5a24cb16 100644
--- a/src/TNL/Matrices/DenseMatrix.hpp
+++ b/src/TNL/Matrices/DenseMatrix.hpp
@@ -105,9 +105,11 @@ auto
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
 getView() -> ViewType
 {
+   ValuesView values_view = this->getValues().getView();
+   // note this is improtant here to avoid const qualifier to appear in - somehow :(
    return ViewType( this->getRows(),
                     this->getColumns(),
-                    this->getValues().getView() );
+                    values_view );
 }
 
 template< typename Real,
@@ -353,9 +355,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
 {
-   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+   this->view.reduceRows( begin, end, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -366,9 +368,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
-   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+   this->view.reduceRows( begin, end, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -379,9 +381,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -392,9 +394,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -405,7 +407,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-forElements( IndexType begin, IndexType end, Function& function ) const
+forElements( IndexType begin, IndexType end, Function&& function ) const
 {
    this->view.forElements( begin, end, function );
 }
@@ -418,7 +420,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-forElements( IndexType first, IndexType last, Function& function )
+forElements( IndexType first, IndexType last, Function&& function )
 {
    this->view.forElements( first, last, function );
 }
@@ -431,7 +433,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-forEachElement( Function& function ) const
+forAllElements( Function&& function ) const
 {
    this->forElements( 0, this->getRows(), function );
 }
@@ -444,7 +446,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-forEachElement( Function& function )
+forAllElements( Function&& function )
 {
    this->forElements( 0, this->getRows(), function );
 }
@@ -457,7 +459,59 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-sequentialForRows( IndexType begin, IndexType end, Function& function ) const
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   this->getView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   this->getConstView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+forAllRows( Function&& function )
+{
+   this->getView().forAllRows( function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+forAllRows( Function&& function ) const
+{
+   this->getConsView().forAllRows( function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
+sequentialForRows( IndexType begin, IndexType end, Function&& function ) const
 {
    this->view.sequentialForRows( begin, end, function );
 }
@@ -470,7 +524,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-sequentialForRows( IndexType first, IndexType last, Function& function )
+sequentialForRows( IndexType first, IndexType last, Function&& function )
 {
    this->view.sequentialForRows( first, last, function );
 }
@@ -483,7 +537,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-sequentialForAllRows( Function& function ) const
+sequentialForAllRows( Function&& function ) const
 {
    this->sequentialForRows( 0, this->getRows(), function );
 }
@@ -496,7 +550,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrix< Real, Device, Index, Organization, RealAllocator >::
-sequentialForAllRows( Function& function )
+sequentialForAllRows( Function&& function )
 {
    this->sequentialForRows( 0, this->getRows(), function );
 }
@@ -1048,7 +1102,7 @@ operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSOrganization, RHS
       auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
          this_view( rowIdx, columnIdx ) = value;
       };
-      matrix.forEachElement( f );
+      matrix.forAllElements( f );
    }
    else
    {
@@ -1124,7 +1178,7 @@ operator=( const RHSMatrix& matrix )
          if( value != 0.0 && columnIdx != padding_index )
             values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value;
       };
-      matrix.forEachElement( f );
+      matrix.forAllElements( f );
    }
    else
    {
diff --git a/src/TNL/Matrices/DenseMatrixElement.h b/src/TNL/Matrices/DenseMatrixElement.h
new file mode 100644
index 0000000000000000000000000000000000000000..e35235fd9ed6cbb8a752dec1d024d26cda938790
--- /dev/null
+++ b/src/TNL/Matrices/DenseMatrixElement.h
@@ -0,0 +1,63 @@
+/***************************************************************************
+                          DenseMatrixElement.h -  description
+                             -------------------
+    begin                : Mar 22, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+
+namespace TNL {
+namespace Matrices {
+
+
+template< typename Real,
+          typename Index >
+class DenseMatrixElement
+{
+   public:
+
+      using RealType = Real;
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      DenseMatrixElement( RealType& value,
+                          const IndexType& rowIdx,
+                          const IndexType& columnIdx,
+                          const IndexType& localIdx )  // localIdx is here only for compatibility with SparseMatrixElement
+      : value_( value ), rowIdx( rowIdx ), columnIdx( columnIdx ) {};
+
+      __cuda_callable__
+      RealType& value() { return value_; };
+
+      __cuda_callable__
+      const RealType& value() const { return value_; };
+
+      __cuda_callable__
+      const IndexType& rowIndex() const { return rowIdx; };
+
+      __cuda_callable__
+      const IndexType& columnIndex() const { return columnIdx; };
+
+      __cuda_callable__
+      const IndexType& localIndex() const { return columnIdx; };
+
+   protected:
+
+      RealType& value_;
+
+      const IndexType& rowIdx;
+
+      const IndexType columnIdx;
+};
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h
index 49774949b0d31ac2d7a50c8a795d12c21a8d178d..e280b20b4e6ff8aa7780f738ec5e897072297749 100644
--- a/src/TNL/Matrices/DenseMatrixRowView.h
+++ b/src/TNL/Matrices/DenseMatrixRowView.h
@@ -10,6 +10,10 @@
 
 #pragma once
 
+#include <TNL/Cuda/CudaCallable.h>
+#include <TNL/Matrices/MatrixRowViewIterator.h>
+#include <TNL/Matrices/DenseMatrixElement.h>
+
 namespace TNL {
    namespace Matrices {
 
@@ -57,6 +61,31 @@ class DenseMatrixRowView
        */
       using ValuesViewType = ValuesView;
 
+      /**
+       * \brief Type of constant container view used for storing the matrix elements values.
+       */
+      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+
+      /**
+       * \brief Type of dense matrix row view.
+       */
+      using RowView = DenseMatrixRowView< SegmentView, ValuesViewType >;
+
+      /**
+       * \brief Type of constant sparse matrix row view.
+       */
+      using ConstView = DenseMatrixRowView< SegmentView, ConstValuesViewType >;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = DenseMatrixElement< RealType, IndexType >;
+
+      /**
+       * \brief Type of iterator for the matrix row.
+       */
+      using IteratorType = MatrixRowViewIterator< RowView >;
+
       /**
        * \brief Constructor with \e segmentView and \e values
        *
@@ -75,6 +104,14 @@ class DenseMatrixRowView
       __cuda_callable__
       IndexType getSize() const;
 
+      /**
+       * \brief Returns the matrix row index.
+       *
+       * \return matrix row index.
+       */
+      __cuda_callable__
+      const IndexType& getRowIndex() const;
+
       /**
        * \brief Returns constants reference to an element with given column index.
        *
@@ -83,7 +120,7 @@ class DenseMatrixRowView
        * \return constant reference to the matrix element.
        */
       __cuda_callable__
-      const RealType& getElement( const IndexType column ) const;
+      const RealType& getValue( const IndexType column ) const;
 
       /**
        * \brief Returns non-constants reference to an element with given column index.
@@ -93,7 +130,17 @@ class DenseMatrixRowView
        * \return non-constant reference to the matrix element.
        */
       __cuda_callable__
-      RealType& getElement( const IndexType column );
+      RealType& getValue( const IndexType column );
+
+      /**
+       * \brief This method is only for compatibility with sparse matrix row.
+       *
+       * \param localIdx is the rank of the matrix element in given row.
+       *
+       * \return the value of \ref localIdx as column index.
+       */
+      __cuda_callable__
+      IndexType getColumnIndex( const IndexType localIdx ) const;
 
       /**
        * \brief Sets value of matrix element with given column index
@@ -102,8 +149,8 @@ class DenseMatrixRowView
        * \param value is a value the matrix element will be set to.
        */
       __cuda_callable__
-      void setElement( const IndexType column,
-                       const RealType& value );
+      void setValue( const IndexType column,
+                     const RealType& value );
 
       /**
        * \brief Sets value of matrix element with given column index
@@ -118,6 +165,39 @@ class DenseMatrixRowView
       void setElement( const IndexType localIdx,
                        const IndexType column,
                        const RealType& value );
+
+      /**
+       * \brief Returns iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      IteratorType begin();
+
+      /**
+       * \brief Returns iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      IteratorType end();
+
+      /**
+       * \brief Returns constant iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      const IteratorType cbegin() const;
+
+      /**
+       * \brief Returns constant iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      const IteratorType cend() const;
+
    protected:
 
       SegmentViewType segmentView;
diff --git a/src/TNL/Matrices/DenseMatrixRowView.hpp b/src/TNL/Matrices/DenseMatrixRowView.hpp
index 1c7af4adf97ee607163c83468ce0f067d7f46018..6c322cf71398cf7c77f707be7a2745ced7d216e0 100644
--- a/src/TNL/Matrices/DenseMatrixRowView.hpp
+++ b/src/TNL/Matrices/DenseMatrixRowView.hpp
@@ -38,7 +38,16 @@ template< typename SegmentView,
           typename ValuesView >
 __cuda_callable__ auto
 DenseMatrixRowView< SegmentView, ValuesView >::
-getElement( const IndexType column ) const -> const RealType&
+getRowIndex() const -> const IndexType&
+{
+   return segmentView.getSegmentIndex();
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getValue( const IndexType column ) const -> const RealType&
 {
    TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
    return values[ segmentView.getGlobalIndex( column ) ];
@@ -48,18 +57,29 @@ template< typename SegmentView,
           typename ValuesView >
 __cuda_callable__ auto
 DenseMatrixRowView< SegmentView, ValuesView >::
-getElement( const IndexType column ) -> RealType&
+getValue( const IndexType column ) -> RealType&
 {
    TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
    return values[ segmentView.getGlobalIndex( column ) ];
 }
 
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+getColumnIndex( const IndexType localIdx ) const -> IndexType
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Column index exceeds matrix row size." );
+   return localIdx;
+}
+
+
 template< typename SegmentView,
           typename ValuesView >
 __cuda_callable__ void
 DenseMatrixRowView< SegmentView, ValuesView >::
-setElement( const IndexType column,
-            const RealType& value )
+setValue( const IndexType column,
+          const RealType& value )
 {
    TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." );
    const IndexType globalIdx = segmentView.getGlobalIndex( column );
@@ -79,5 +99,41 @@ setElement( const IndexType localIdx,
    values[ globalIdx ] = value;
 }
 
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+begin() -> IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+end() -> IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+cbegin() const -> const IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename SegmentView,
+          typename ValuesView >
+__cuda_callable__ auto
+DenseMatrixRowView< SegmentView, ValuesView >::
+cend() const -> const IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
    } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h
index 53b8fb324d10808a79d45b5188698db3d92e1c91..89ace2d0697a086de8311a677c40ae97e1759ea3 100644
--- a/src/TNL/Matrices/DenseMatrixView.h
+++ b/src/TNL/Matrices/DenseMatrixView.h
@@ -43,7 +43,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
 {
    protected:
       using BaseType = Matrix< Real, Device, Index >;
-      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesType = typename BaseType::ValuesType;
       using SegmentsType = Algorithms::Segments::Ellpack< Device, Index, typename Allocators::Default< Device >::template Allocator< Index >, Organization, 1 >;
       using SegmentsViewType = typename SegmentsType::ViewType;
       using SegmentViewType = typename SegmentsType::SegmentViewType;
@@ -77,7 +77,14 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        *
        * Use this for embedding of the matrix elements values.
        */
-      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ValuesViewType = typename ValuesType::ViewType;
+
+      /**
+       * \brief Matrix elements container view type.
+       *
+       * Use this for embedding of the matrix elements values.
+       */
+      using ConstValuesViewType = typename ValuesType::ConstViewType;
 
       /**
        * \brief Matrix view type.
@@ -91,7 +98,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        *
        * See \ref DenseMatrixView.
        */
-      using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >;
+      using ConstViewType = DenseMatrixView< std::add_const_t< Real >, Device, Index, Organization >;
 
       /**
        * \brief Type for accessing matrix row.
@@ -125,13 +132,33 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \include Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp
        * \par Output
        * \include DenseMatrixViewExample_constructor.out
-
        */
       __cuda_callable__
       DenseMatrixView( const IndexType rows,
                        const IndexType columns,
                        const ValuesViewType& values );
 
+      /**
+       * \brief Constructor with matrix dimensions and values.
+       *
+       * Organization of matrix elements values in
+       *
+       * \param rows number of matrix rows.
+       * \param columns number of matrix columns.
+       * \param values is vector view with matrix elements values.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_constructor.out
+       */
+       template< typename Real_ >
+      __cuda_callable__
+      DenseMatrixView( const IndexType rows,
+                       const IndexType columns,
+                       const Containers::VectorView< Real_, Device, Index >& values );
+
+
       /**
        * \brief Copy constructor.
        *
@@ -305,7 +332,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref DenseMatrix::getRow
-       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forEachElement.
+       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forAllElements.
        *
        * \param row is row index of the element.
        * \param column is columns index of the element.
@@ -329,7 +356,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref DenseMatrix::getRow
-       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forEachElement.
+       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forAllElements.
        *
        * \param row is row index of the element.
        * \param column is columns index of the element.
@@ -357,7 +384,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref DenseMatrix::getRow
-       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forEachElement.
+       * or \ref DenseMatrix::forElements and \ref DenseMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -394,12 +421,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include DenseMatrixViewExample_rowsReduction.out
+       * \include DenseMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on matrix rows for constant instances.
@@ -421,12 +448,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include DenseMatrixViewExample_rowsReduction.out
+       * \include DenseMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on ALL matrix rows.
@@ -446,12 +473,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include DenseMatrixViewExample_allRowsReduction.out
+       * \include DenseMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on ALL matrix rows for constant instances.
@@ -471,12 +498,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include DenseMatrixViewExample_allRowsReduction.out
+       * \include DenseMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for iteration over all matrix rows for constant instances.
@@ -484,8 +511,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \tparam Function is type of lambda function that will operate on matrix elements.
        *    It is should have form like
        *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
-       *  The column index repeats twice only for compatibility with sparse matrices. 
-       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  The column index repeats twice only for compatibility with sparse matrices.
+       *  If the 'compute' variable is set to false the iteration over the row can
        *  be interrupted.
        *
        * \param begin defines beginning of the range [begin,end) of rows to be processed.
@@ -498,7 +525,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \include DenseMatrixViewExample_forRows.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function& function ) const;
+      void forElements( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
        * \brief Method for iteration over all matrix rows for non-constant instances.
@@ -506,8 +533,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \tparam Function is type of lambda function that will operate on matrix elements.
        *    It is should have form like
        *  `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, RealType& value, bool& compute )`.
-       *  The column index repeats twice only for compatibility with sparse matrices. 
-       *  If the 'compute' variable is set to false the iteration over the row can 
+       *  The column index repeats twice only for compatibility with sparse matrices.
+       *  If the 'compute' variable is set to false the iteration over the row can
        *  be interrupted.
        *
        * \param begin defines beginning of the range [begin,end) of rows to be processed.
@@ -520,7 +547,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \include DenseMatrixViewExample_forRows.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function& function );
+      void forElements( IndexType begin, IndexType end, Function&& function );
 
       /**
        * \brief This method calls \e forElements for all matrix rows.
@@ -536,12 +563,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \include DenseMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function&& function ) const;
 
       /**
        * \brief This method calls \e forElements for all matrix rows.
        *
-       * See \ref DenseMatrix::forEachElement.
+       * See \ref DenseMatrix::forAllElements.
        *
        * \tparam Function is a type of lambda function that will operate on matrix elements.
        * \param function  is an instance of the lambda function to be called in each row.
@@ -552,7 +579,107 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \include DenseMatrixExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllElements( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) mutable { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrix::RowViewType.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp
+       * \par Output
+       * \include DenseMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrixView::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrixView::RowViewType.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) mutable { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrixView::RowViewType.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref DenseMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowViewType& row ) { ... };
+       * ```
+       *
+       * \e RowViewType represents matrix row - see \ref TNL::Matrices::DenseMatrixView::RowViewType.
+       *
+       * \par Example
+       * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include DenseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
@@ -569,7 +696,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param function is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForRows( IndexType begin, IndexType end, Function& function ) const;
+      void sequentialForRows( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for non-constant instances.
@@ -586,7 +713,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param function is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForRows( IndexType begin, IndexType end, Function& function );
+      void sequentialForRows( IndexType begin, IndexType end, Function&& function );
 
       /**
        * \brief This method calls \e sequentialForRows for all matrix rows (for constant instances).
@@ -597,7 +724,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param function  is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForAllRows( Function& function ) const;
+      void sequentialForAllRows( Function&& function ) const;
 
       /**
        * \brief This method calls \e sequentialForRows for all matrix rows.
@@ -608,7 +735,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
        * \param function  is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForAllRows( Function& function );
+      void sequentialForAllRows( Function&& function );
 
       /**
        * \brief Computes product of matrix and vector.
@@ -667,7 +794,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Assignment operator with DenseMatrix.
-       * 
+       *
        * \param matrix is the right-hand side matrix.
        * \return reference to this matrix.
        */
@@ -675,25 +802,25 @@ class DenseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Method for saving the matrix view to the file with given filename.
-       * 
+       *
        * The ouput file can be loaded by \ref DenseMatrix.
-       * 
+       *
        * \param fileName is name of the file.
        */
       void save( const String& fileName ) const;
 
       /**
        * \brief Method for saving the matrix view to a file.
-       * 
+       *
        * The ouput file can be loaded by \ref DenseMatrix.
-       * 
+       *
        * \param fileName is name of the file.
        */
       void save( File& file ) const;
 
       /**
        * \brief Method for printing the matrix to output stream.
-       * 
+       *
        * \param str is the output stream.
        */
       void print( std::ostream& str ) const;
diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp
index 97e82af0e31937860a94466ff95d0837936c3b83..0bf262aa041523b298f1f207081847959da758e3 100644
--- a/src/TNL/Matrices/DenseMatrixView.hpp
+++ b/src/TNL/Matrices/DenseMatrixView.hpp
@@ -38,10 +38,22 @@ DenseMatrixView< Real, Device, Index, Organization >::
 DenseMatrixView( const IndexType rows,
                  const IndexType columns,
                  const ValuesViewType& values )
- : MatrixView< Real, Device, Index >( rows, columns, values )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), segments( rows, columns )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Value_ >
+__cuda_callable__
+DenseMatrixView< Real, Device, Index, Organization >::
+DenseMatrixView( const IndexType rows,
+                 const IndexType columns,
+                 const Containers::VectorView< Value_, Device, Index >& values )
+ : MatrixView< Real, Device, Index >( rows, columns, values ), segments( rows, columns, true )
 {
-   SegmentsType a( rows, columns );
-   segments = a.getView();
 }
 
 template< typename Real,
@@ -130,7 +142,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   this->allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   this->reduceAllRows( fetch, std::plus<>{}, keep, 0 );
 }
 
 template< typename Real,
@@ -278,7 +290,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
 {
    auto values_view = this->values.getView();
    auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
@@ -295,7 +307,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
    const auto values_view = this->values.getConstView();
    auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
@@ -312,9 +324,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -324,9 +336,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -336,7 +348,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-forElements( IndexType begin, IndexType end, Function& function ) const
+forElements( IndexType begin, IndexType end, Function&& function ) const
 {
    const auto values_view = this->values.getConstView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
@@ -352,7 +364,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-forElements( IndexType begin, IndexType end, Function& function )
+forElements( IndexType begin, IndexType end, Function&& function )
 {
    auto values_view = this->values.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable {
@@ -368,7 +380,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-forEachElement( Function& function ) const
+forAllElements( Function&& function ) const
 {
    this->forElements( 0, this->getRows(), function );
 }
@@ -380,7 +392,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-forEachElement( Function& function )
+forAllElements( Function&& function )
 {
    this->forElements( 0, this->getRows(), function );
 }
@@ -392,10 +404,70 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-sequentialForRows( IndexType begin, IndexType end, Function& function ) const
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   auto values_view = this->values.getView();
+   using SegmentViewType = typename SegmentsViewType::SegmentViewType;
+   auto f = [=] __cuda_callable__ ( SegmentViewType& segmentView ) mutable {
+      auto rowView = RowView( segmentView, values_view );
+      function( rowView );
+   };
+   this->segments.forSegments( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, Organization >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   using SegmentViewType = typename SegmentsViewType::SegmentViewType;
+   auto f = [=] __cuda_callable__ ( SegmentViewType&& segmentView ) mutable {
+      const auto rowView = RowViewType( segmentView, values_view );
+      function( rowView );
+   };
+   this->segments.forSegments( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, Organization >::
+forAllRows( Function&& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, Organization >::
+forAllRows( Function&& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+DenseMatrixView< Real, Device, Index, Organization >::
+sequentialForRows( IndexType begin, IndexType end, Function&& function ) const
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename Real,
@@ -405,10 +477,10 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-sequentialForRows( IndexType begin, IndexType end, Function& function )
+sequentialForRows( IndexType begin, IndexType end, Function&& function )
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename Real,
@@ -418,7 +490,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-sequentialForAllRows( Function& function ) const
+sequentialForAllRows( Function&& function ) const
 {
    this->sequentialForRows( 0, this->getRows(), function );
 }
@@ -430,7 +502,7 @@ template< typename Real,
    template< typename Function >
 void
 DenseMatrixView< Real, Device, Index, Organization >::
-sequentialForAllRows( Function& function )
+sequentialForAllRows( Function&& function )
 {
    this->sequentialForRows( 0, this->getRows(), function );
 }
diff --git a/src/TNL/Matrices/LambdaMatrix.h b/src/TNL/Matrices/LambdaMatrix.h
index 56d1689891c4965bf6545cfe1849b63eba9c4190..511942f0195b1f8b0617d9dde6b5429c34cc4b25 100644
--- a/src/TNL/Matrices/LambdaMatrix.h
+++ b/src/TNL/Matrices/LambdaMatrix.h
@@ -13,6 +13,7 @@
 #include <type_traits>
 #include <TNL/String.h>
 #include <TNL/Devices/Host.h>
+#include <TNL/Matrices/LambdaMatrixRowView.h>
 
 namespace TNL {
 namespace Matrices {
@@ -25,24 +26,30 @@ namespace Matrices {
  *
  * \tparam MatrixElementsLambda is a lambda function returning matrix elements values and positions.
  *
- *    It has the following form:
+ * \tparam MatrixElementsLambda is a lambda function returning matrix elements values and positions.
+ *
+ * It has the following form:
  *
- *   `matrixElements( Index rows, Index columns, Index rowIdx, Index localIdx, Index& columnIdx, Real& value )`
+ * ```
+ * auto matrixElements = [] __cuda_callable__ ( Index rows, Index columns, Index rowIdx, Index localIdx, Index& columnIdx, Real& value ) { ... }
+ * ```
  *
  *    where \e rows is the number of matrix rows, \e columns is the number of matrix columns, \e rowIdx is the index of matrix row being queried,
  *    \e localIdx is the rank of the non-zero element in given row, \e columnIdx is a column index of the matrix element computed by
  *    this lambda and \e value is a value of the matrix element computed by this lambda.
  * \tparam CompressedRowLengthsLambda is a lambda function returning a number of non-zero elements in each row.
  *
- *    It has the following form:
+ * It has the following form:
  *
- *    `rowLengths( Index rows, Index columns, Index rowIdx ) -> IndexType`
+ * ```
+ * auto rowLengths = [] __cuda_callable__ ( Index rows, Index columns, Index rowIdx ) -> IndexType { ...  }
+ * ```
  *
  *    where \e rows is the number of matrix rows, \e columns is the number of matrix columns and \e rowIdx is an index of the row being queried.
  *
  * \tparam Real is a type of matrix elements values.
  * \tparam Device is a device on which the lambda functions will be evaluated.
- * \ẗparam Index is a type to be used for indexing.
+ * \tparam Index is a type to be used for indexing.
  */
 template< typename MatrixElementsLambda,
           typename CompressedRowLengthsLambda,
@@ -68,6 +75,26 @@ class LambdaMatrix
        */
       using IndexType = Index;
 
+      /**
+       * \brief Type of the lambda function returning the matrix elements.
+       */
+      using MatrixElementsLambdaType = MatrixElementsLambda;
+
+      /**
+       * \brief Type of the lambda function returning the number of non-zero elements in each row.
+       */
+      using CompressedRowLengthsLambdaType = CompressedRowLengthsLambda;
+
+      /**
+       * \brief Type of Lambda matrix row view.
+       */
+      using RowView = LambdaMatrixRowView< MatrixElementsLambdaType, CompressedRowLengthsLambdaType, RealType, IndexType >;
+
+      /**
+       * \brief Type of constant Lambda matrix row view.
+       */
+      using ConstRowView = RowView;
+
       static constexpr bool isSymmetric() { return false; };
       static constexpr bool isBinary() { return false; };
 
@@ -130,7 +157,7 @@ class LambdaMatrix
        * \param columns is the number of matrix columns.
        */
       void setDimensions( const IndexType& rows,
-                         const IndexType& columns );
+                          const IndexType& columns );
 
       /**
        * \brief Returns a number of matrix rows.
@@ -148,6 +175,22 @@ class LambdaMatrix
       __cuda_callable__
       IndexType getColumns() const;
 
+      /**
+       * \brief Get reference to the lambda function returning number of non-zero elements in each row.
+       *
+       * \return constant reference to CompressedRowLengthsLambda.
+       */
+      __cuda_callable__
+      const CompressedRowLengthsLambda& getCompressedRowLengthsLambda() const;
+
+      /**
+       * \brief Get reference to the lambda function returning the matrix elements values and column indexes.
+       *
+       * \return constant reference to MatrixElementsLambda.
+       */
+      __cuda_callable__
+      const MatrixElementsLambda& getMatrixElementsLambda() const;
+
       /**
        * \brief Compute capacities of all rows.
        *
@@ -185,67 +228,32 @@ class LambdaMatrix
       IndexType getNonzeroElementsCount() const;
 
       /**
-       * \brief Returns value of matrix element at position given by its row and column index.
+       * \brief Getter of simple structure for accessing given matrix row.
        *
-       * \param row is a row index of the matrix element.
-       * \param column i a column index of the matrix element.
+       * \param rowIdx is matrix row index.
        *
-       * \return value of given matrix element.
-       */
-      RealType getElement( const IndexType row,
-                           const IndexType column ) const;
-
-      /**
-       * \brief Method for performing general reduction on matrix rows.
-       *
-       * \tparam Fetch is a type of lambda function for data fetch declared as
-       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
-       *          The return type of this lambda can be any non void.
-       * \tparam Reduce is a type of lambda function for reduction declared as
-       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
-       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
-       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
-       * \tparam FetchValue is type returned by the Fetch lambda function.
-       *
-       * \param begin defines beginning of the range [begin,end) of rows to be processed.
-       * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param fetch is an instance of lambda function for data fetch.
-       * \param reduce is an instance of lambda function for reduction.
-       * \param keep in an instance of lambda function for storing results.
-       * \param zero is zero of given reduction operation also known as idempotent element.
+       * \return RowView for accessing given matrix row.
        *
        * \par Example
-       * \include Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp
+       * \include Matrices/SparseMatrix/LambdaMatrixExample_getRow.cpp
        * \par Output
-       * \include LambdaMatrixExample_rowsReduction.out
+       * \include LambdaMatrixExample_getRow.out
+       *
+       * See \ref LambdaMatrixRowView.
        */
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
 
       /**
-       * \brief Method for performing general reduction on ALL matrix rows.
-       *
-       * \tparam Fetch is a type of lambda function for data fetch declared as
-       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
-       *          The return type of this lambda can be any non void.
-       * \tparam Reduce is a type of lambda function for reduction declared as
-       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
-       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
-       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
-       * \tparam FetchValue is type returned by the Fetch lambda function.
+       * \brief Returns value of matrix element at position given by its row and column index.
        *
-       * \param fetch is an instance of lambda function for data fetch.
-       * \param reduce is an instance of lambda function for reduction.
-       * \param keep in an instance of lambda function for storing results.
-       * \param zero is zero of given reduction operation also known as idempotent element.
+       * \param row is a row index of the matrix element.
+       * \param column i a column index of the matrix element.
        *
-       * \par Example
-       * \include Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp
-       * \par Output
-       * \include LambdaMatrixExample_allRowsReduction.out
+       * \return value of given matrix element.
        */
-      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
 
       /**
        * \brief Method for iteration over all matrix rows for constant instances.
@@ -283,7 +291,57 @@ class LambdaMatrix
        * \include LambdaMatrixExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref LambdaMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::LambdaMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
+       * \par Output
+       * \include LambdaMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref LambdaMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::LambdaMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp
+       * \par Output
+       * \include LambdaMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
@@ -300,7 +358,7 @@ class LambdaMatrix
        * \param function is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForRows( IndexType begin, IndexType end, Function& function ) const;
+      void sequentialForRows( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
        * \brief This method calls \e sequentialForRows for all matrix rows (for constant instances).
@@ -311,7 +369,59 @@ class LambdaMatrix
        * \param function  is an instance of the lambda function to be called in each row.
        */
       template< typename Function >
-      void sequentialForAllRows( Function& function ) const;
+      void sequentialForAllRows( Function&& function ) const;
+
+      /**
+       * \brief Method for performing general reduction on matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param begin defines beginning of the range [begin,end) of rows to be processed.
+       * \param end defines ending of the range [begin,end) of rows to be processed.
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_reduceRows.cpp
+       * \par Output
+       * \include LambdaMatrixExample_reduceRows.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+
+      /**
+       * \brief Method for performing general reduction on ALL matrix rows.
+       *
+       * \tparam Fetch is a type of lambda function for data fetch declared as
+       *          `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`.
+       *          The return type of this lambda can be any non void.
+       * \tparam Reduce is a type of lambda function for reduction declared as
+       *          `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`.
+       * \tparam Keep is a type of lambda function for storing results of reduction in each row.
+       *          It is declared as `keep( const IndexType rowIdx, const double& value )`.
+       * \tparam FetchValue is type returned by the Fetch lambda function.
+       *
+       * \param fetch is an instance of lambda function for data fetch.
+       * \param reduce is an instance of lambda function for reduction.
+       * \param keep in an instance of lambda function for storing results.
+       * \param zero is zero of given reduction operation also known as idempotent element.
+       *
+       * \par Example
+       * \include Matrices/LambdaMatrix/LambdaMatrixExample_reduceAllRows.cpp
+       * \par Output
+       * \include LambdaMatrixExample_reduceAllRows.out
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Computes product of matrix and vector.
diff --git a/src/TNL/Matrices/LambdaMatrix.hpp b/src/TNL/Matrices/LambdaMatrix.hpp
index ee59799c5329ecf551ab46aefe923c1892de484d..f2cdb75749f828d57b91fe5219d236c96162e698 100644
--- a/src/TNL/Matrices/LambdaMatrix.hpp
+++ b/src/TNL/Matrices/LambdaMatrix.hpp
@@ -87,6 +87,32 @@ getColumns() const
    return this->columns;
 }
 
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const CompressedRowLengthsLambda&
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getCompressedRowLengthsLambda() const
+{
+   return this->compressedRowLengthsLambda;
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const MatrixElementsLambda&
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getMatrixElementsLambda() const
+{
+   return this->matrixElementsLambda;
+}
+
 template< typename MatrixElementsLambda,
           typename CompressedRowLengthsLambda,
           typename Real,
@@ -119,7 +145,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   this->allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   this->reduceAllRows( fetch, std::plus<>{}, keep, 0 );
 }
 
 template< typename MatrixElementsLambda,
@@ -171,6 +197,23 @@ getElement( const IndexType row,
    return valueView.getElement( 0 );
 }
 
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+auto
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return RowView( this->getMatrixElementsLambda(),
+                   this->getCompressedRowLengthsLambda(),
+                   this->getRows(),
+                   this->getColumns(),
+                   rowIdx );
+}
+
 template< typename MatrixElementsLambda,
           typename CompressedRowLengthsLambda,
           typename Real,
@@ -208,7 +251,7 @@ vectorProduct( const InVector& inVector,
    };
    if( ! end )
       end = this->getRows();
-   this->rowsReduction( begin, end, fetch, reduce, keep, 0.0 );
+   this->reduceRows( begin, end, fetch, reduce, keep, 0.0 );
 }
 
 template< typename MatrixElementsLambda,
@@ -219,7 +262,7 @@ template< typename MatrixElementsLambda,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceRows( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    using FetchType = decltype( fetch( IndexType(), IndexType(), RealType() ) );
 
@@ -253,9 +296,9 @@ template< typename MatrixElementsLambda,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename MatrixElementsLambda,
@@ -295,26 +338,40 @@ template< typename MatrixElementsLambda,
    template< typename Function >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-forEachElement( Function& function ) const
+forAllElements( Function& function ) const
 {
    forElements( 0, this->getRows(), function );
-   /*const IndexType rows = this->getRows();
-   const IndexType columns = this->getColumns();
-   auto rowLengths = this->compressedRowLengthsLambda;
-   auto matrixElements = this->matrixElementsLambda;
-   auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      const IndexType rowLength = rowLengths( rows, columns, rowIdx );
-      bool compute( true );
-      for( IndexType localIdx = 0; localIdx < rowLength && compute; localIdx++ )
-      {
-        IndexType elementColumn( 0 );
-        RealType elementValue( 0.0 );
-        matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue );
-        if( elementValue != 0.0 )
-            function( rowIdx, localIdx, elementColumn, elementValue, compute );
-      }
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Function >
+void
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = *this;
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto rowView = view.getRow( rowIdx );
+      function( rowView );
    };
-   Algorithms::ParallelFor< DeviceType >::exec( 0, this->getRows(), processRow );*/
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Device,
+          typename Index >
+   template< typename Function >
+void
+LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
+forAllRows( Function&& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename MatrixElementsLambda,
@@ -325,10 +382,10 @@ template< typename MatrixElementsLambda,
    template< typename Function >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-sequentialForRows( IndexType begin, IndexType end, Function& function ) const
+sequentialForRows( IndexType begin, IndexType end, Function&& function ) const
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename MatrixElementsLambda,
@@ -339,7 +396,7 @@ template< typename MatrixElementsLambda,
    template< typename Function >
 void
 LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >::
-sequentialForAllRows( Function& function ) const
+sequentialForAllRows( Function&& function ) const
 {
    sequentialForRows( 0, this->getRows(), function );
 }
diff --git a/src/TNL/Matrices/LambdaMatrixElement.h b/src/TNL/Matrices/LambdaMatrixElement.h
new file mode 100644
index 0000000000000000000000000000000000000000..57ba698f3674dada2aadd9fa7d5c62d83eb138dd
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrixElement.h
@@ -0,0 +1,65 @@
+/***************************************************************************
+                          LambdaMatrixElement.h -  description
+                             -------------------
+    begin                : Mar 22, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+
+namespace TNL {
+namespace Matrices {
+
+
+template< typename Real,
+          typename Index >
+class LambdaMatrixElement
+{
+   public:
+
+      using RealType = Real;
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      LambdaMatrixElement( const RealType& value,
+                           const IndexType& rowIdx,
+                           const IndexType& columnIdx,
+                           const IndexType& localIdx )
+      : value_( value ), rowIdx( rowIdx ), columnIdx( columnIdx ), localIdx( localIdx ) {};
+
+      __cuda_callable__
+      LambdaMatrixElement( const LambdaMatrixElement& el ) = default;
+
+      __cuda_callable__
+      const RealType& value() const { return value_; };
+
+      __cuda_callable__
+      const IndexType& rowIndex() const { return rowIdx; };
+
+      __cuda_callable__
+      const IndexType& columnIndex() const { return columnIdx; };
+
+      __cuda_callable__
+      const IndexType& localIndex() const { return localIdx; };
+
+   protected:
+
+      const RealType value_;
+
+      const IndexType& rowIdx;
+
+      const IndexType columnIdx;
+
+      const IndexType& localIdx;
+};
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/LambdaMatrixRowView.h b/src/TNL/Matrices/LambdaMatrixRowView.h
new file mode 100644
index 0000000000000000000000000000000000000000..b2e7bfaf87302d788ea0b5030061ac987bf56069
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrixRowView.h
@@ -0,0 +1,228 @@
+ /***************************************************************************
+                          LambdaMatrixRowView.h -  description
+                             -------------------
+    begin                : Mar 17, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+#include <TNL/Matrices/LambdaMatrixRowViewIterator.h>
+#include <TNL/Matrices/LambdaMatrixElement.h>
+
+
+namespace TNL {
+namespace Matrices {
+
+/**
+ * \brief RowView is a simple structure for accessing rows of Lambda matrix.
+ *
+ * \tparam MatrixElementsLambda is a lambda function returning matrix elements values and positions.
+ *
+ * It has the following form:
+ *
+ * ```
+ * auto matrixElements = [] __cuda_callable__ ( Index rows, Index columns, Index rowIdx, Index localIdx, Index& columnIdx, Real& value ) { ... }
+ * ```
+ *
+ *    where \e rows is the number of matrix rows, \e columns is the number of matrix columns, \e rowIdx is the index of matrix row being queried,
+ *    \e localIdx is the rank of the non-zero element in given row, \e columnIdx is a column index of the matrix element computed by
+ *    this lambda and \e value is a value of the matrix element computed by this lambda.
+ * \tparam CompressedRowLengthsLambda is a lambda function returning a number of non-zero elements in each row.
+ *
+ * It has the following form:
+ *
+ * ```
+ * auto rowLengths = [] __cuda_callable__ ( Index rows, Index columns, Index rowIdx ) -> IndexType { ... }
+ * ```
+ *
+ *    where \e rows is the number of matrix rows, \e columns is the number of matrix columns and \e rowIdx is an index of the row being queried.
+ *
+ * \tparam Real is a type of matrix elements values.
+ * \tparam Index is a type to be used for indexing.
+ *
+ * \par Example
+ * \include Matrices/LambdaMatrix/LambdaMatrixExample_getRow.cpp
+ * \par Output
+ * \include LambdaMatrixExample_getRow.out
+ */
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real = double,
+          typename Index = int >
+class LambdaMatrixRowView
+{
+   public:
+
+      /**
+       * \brief The type of matrix elements.
+       */
+      using RealType = Real;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = Index;
+
+      /**
+       * \brief Type of the lambda function returning the matrix elements.
+       */
+      using MatrixElementsLambdaType = MatrixElementsLambda;
+
+      /**
+       * \brief Type of the lambda function returning the number of non-zero elements in each row.
+       */
+      using CompressedRowLengthsLambdaType = CompressedRowLengthsLambda;
+
+      /**
+       * \brief Type of Lambda matrix row view.
+       */
+      using RowView = LambdaMatrixRowView< MatrixElementsLambdaType, CompressedRowLengthsLambdaType, RealType, IndexType >;
+
+      /**
+       * \brief Type of constant Lambda matrix row view.
+       */
+      using ConstRowView = RowView;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = LambdaMatrixElement< RealType, IndexType >;
+
+      /**
+       * \brief Type of iterator for the matrix row.
+       */
+      using IteratorType = LambdaMatrixRowViewIterator< RowView >;
+
+      /**
+       * \brief Constructor with related lambda functions, matrix dimensions and row index.
+       *
+       * \param matrixElementsLambda is a constant reference to the lambda function evaluating matrix elements.
+       * \param compressedRowLengthsLambda is a constant reference to the lambda function returning the number of nonzero elements in each row.
+       * \param rows is number of matrix rows.
+       * \param columns is number of matrix columns.
+       * \param rowIdx is the matrix row index.
+       */
+      __cuda_callable__
+      LambdaMatrixRowView( const MatrixElementsLambdaType& matrixElementsLambda,
+                           const CompressedRowLengthsLambdaType& compressedRowLengthsLambda,
+                           const IndexType& rows,
+                           const IndexType& columns,
+                           const IndexType& rowIdx );
+
+      /**
+       * \brief Returns size of the matrix row, i.e. number of matrix elements in this row.
+       *
+       * \return Size of the matrix row.
+       */
+      __cuda_callable__
+      IndexType getSize() const;
+
+      /**
+       * \brief Returns the matrix row index.
+       *
+       * \return matrix row index.
+       */
+      __cuda_callable__
+      const IndexType& getRowIndex() const;
+
+      /**
+       * \brief Returns constants reference to a column index of an element with given rank in the row.
+       *
+       * \param localIdx is the rank of the non-zero element in given row.
+       *
+       * \return constant reference to the matrix element column index.
+       */
+      __cuda_callable__
+      IndexType getColumnIndex( const IndexType localIdx ) const;
+
+      /**
+       * \brief Returns constants reference to value of an element with given rank in the row.
+       *
+       * \param localIdx is the rank of the non-zero element in given row.
+       *
+       * \return constant reference to the matrix element value.
+       */
+      __cuda_callable__
+      RealType getValue( const IndexType localIdx ) const;
+
+      /**
+       * \brief Comparison of two matrix rows.
+       *
+       * The other matrix row can be from any other matrix.
+       *
+       * \param other is another matrix row.
+       * \return \e true if both rows are the same, \e false otherwise.
+       */
+      template< typename MatrixElementsLambda_,
+                typename CompressedRowLengthsLambda_,
+                typename Real_,
+                typename Index_ >
+      __cuda_callable__
+      bool operator==( const LambdaMatrixRowView< MatrixElementsLambda_, CompressedRowLengthsLambda_, Real_, Index_ >& other ) const;
+
+      /**
+       * \brief Returns non-constant iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      const IteratorType begin() const;
+
+      /**
+       * \brief Returns non-constant iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      const IteratorType end() const;
+
+      /**
+       * \brief Returns constant iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      const IteratorType cbegin() const;
+
+      /**
+       * \brief Returns constant iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      const IteratorType cend() const;
+
+
+   protected:
+
+      const MatrixElementsLambda& matrixElementsLambda;
+
+      const CompressedRowLengthsLambda& compressedRowLengthsLambda;
+
+      IndexType rows, columns, rowIdx;
+};
+
+/**
+ * \brief Insertion operator for a Lambda matrix row.
+ *
+ * \param str is an output stream.
+ * \param row is an input Lambda matrix row.
+ * \return  reference to the output stream.
+ */
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+std::ostream& operator<<( std::ostream& str, const LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >& row );
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/LambdaMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/LambdaMatrixRowView.hpp b/src/TNL/Matrices/LambdaMatrixRowView.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2aec69b943efab48e3eee57a261fc851c083313b
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrixRowView.hpp
@@ -0,0 +1,178 @@
+/***************************************************************************
+                          LambdaMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Mar 17, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/LambdaMatrixRowView.h>
+#include <TNL/Assert.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+LambdaMatrixRowView( const MatrixElementsLambdaType& matrixElementsLambda,
+                     const CompressedRowLengthsLambdaType& compressedRowLengthsLambda,
+                     const IndexType& rows,
+                     const IndexType& columns,
+                     const IndexType& rowIdx )
+ : matrixElementsLambda( matrixElementsLambda ),
+  compressedRowLengthsLambda( compressedRowLengthsLambda ),
+  rows( rows ),
+  columns( columns ),
+  rowIdx( rowIdx )
+{
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+getSize() const -> IndexType
+{
+   return this->compressedRowLengthsLambda( this->rows, this->columns, this->rowIdx );
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__
+auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+getRowIndex() const -> const IndexType&
+{
+   return this->rowIdx;
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+getColumnIndex( const IndexType localIdx ) const -> IndexType
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   RealType value;
+   IndexType columnIdx;
+   this->matrixElementsLambda( this->rows, this->columns, this->rowIdx, localIdx, columnIdx, value );
+   return columnIdx;
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+getValue( const IndexType localIdx ) const -> RealType
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   RealType value;
+   IndexType columnIdx;
+   this->matrixElementsLambda( this->rows, this->columns, this->rowIdx, localIdx, columnIdx, value );
+   return value;
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+   template< typename MatrixElementsLambda_,
+             typename CompressedRowLengthsLambda_,
+             typename Real_,
+             typename Index_ >
+__cuda_callable__
+bool
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+operator==( const LambdaMatrixRowView< MatrixElementsLambda_, CompressedRowLengthsLambda_, Real_, Index_ >& other ) const
+{
+   IndexType i = 0;
+   while( i < getSize() && i < other.getSize() ) {
+      if( getColumnIndex( i ) != other.getColumnIndex( i ) )
+         return false;
+      ++i;
+   }
+   for( IndexType j = i; j < getSize(); j++ )
+      // TODO: use ... != getPaddingIndex()
+      if( getColumnIndex( j ) >= 0 )
+         return false;
+   for( IndexType j = i; j < other.getSize(); j++ )
+      // TODO: use ... != getPaddingIndex()
+      if( other.getColumnIndex( j ) >= 0 )
+         return false;
+   return true;
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+begin() const -> const IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+end() const -> const IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+cbegin() const -> const IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+__cuda_callable__ auto
+LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::
+cend() const -> const IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
+template< typename MatrixElementsLambda,
+          typename CompressedRowLengthsLambda,
+          typename Real,
+          typename Index >
+std::ostream& operator<<( std::ostream& str, const LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >& row )
+{
+   using NonConstIndex = std::remove_const_t< typename LambdaMatrixRowView< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Index >::IndexType >;
+   for( NonConstIndex i = 0; i < row.getSize(); i++ )
+         str << " [ " << row.getColumnIndex( i ) << " ] = " << row.getValue( i ) << ", ";
+   return str;
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/LambdaMatrixRowViewIterator.h b/src/TNL/Matrices/LambdaMatrixRowViewIterator.h
new file mode 100644
index 0000000000000000000000000000000000000000..67b7253e895b7867fa40875488950206882f5505
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrixRowViewIterator.h
@@ -0,0 +1,98 @@
+ /***************************************************************************
+                          LambdaMatrixRowView.h -  description
+                             -------------------
+    begin                : Mar 21, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+#include <TNL/Matrices/LambdaMatrixElement.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename RowView >
+class LambdaMatrixRowViewIterator
+{
+
+   public:
+
+      /**
+       * \brief Type of LambdaMatrixRowView
+       */
+      using RowViewType = RowView;
+
+      /**
+       * \brief The type of matrix elements.
+       */
+      using RealType = typename RowViewType::RealType;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = typename RowViewType::IndexType;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = typename RowView::MatrixElementType;
+
+      /**
+       * \brief Tells whether the parent matrix is a binary matrix.
+       * @return `true` if the matrix is binary.
+       */
+      static constexpr bool isBinary() { return RowViewType::isBinary(); };
+
+      __cuda_callable__
+      LambdaMatrixRowViewIterator( const RowViewType& rowView,
+                                   const IndexType& localIdx );
+
+      /**
+       * \brief Comparison of two matrix row iterators.
+       *
+       * \param other is another matrix row iterator.
+       * \return \e true if both iterators points at the same point of the same matrix, \e false otherwise.
+       */
+      __cuda_callable__
+      bool operator==( const LambdaMatrixRowViewIterator& other ) const;
+
+      /**
+       * \brief Comparison of two matrix row iterators.
+       *
+       * \param other is another matrix row iterator.
+       * \return \e false if both iterators points at the same point of the same matrix, \e true otherwise.
+       */
+      __cuda_callable__
+      bool operator!=( const LambdaMatrixRowViewIterator& other ) const;
+
+      __cuda_callable__
+      LambdaMatrixRowViewIterator& operator++();
+
+      __cuda_callable__
+      LambdaMatrixRowViewIterator& operator--();
+
+      __cuda_callable__
+      MatrixElementType operator*();
+
+      __cuda_callable__
+      const MatrixElementType operator*() const;
+
+   protected:
+
+      const RowViewType& rowView;
+
+      IndexType localIdx = 0;
+};
+
+
+   } // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/LambdaMatrixRowViewIterator.hpp>
diff --git a/src/TNL/Matrices/LambdaMatrixRowViewIterator.hpp b/src/TNL/Matrices/LambdaMatrixRowViewIterator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..13ff5efcae0fe7068b9d13618506186bc5065461
--- /dev/null
+++ b/src/TNL/Matrices/LambdaMatrixRowViewIterator.hpp
@@ -0,0 +1,95 @@
+/***************************************************************************
+                          LambdaMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Mar 21, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/LambdaMatrixRowView.h>
+#include <TNL/Assert.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename RowView >
+__cuda_callable__
+LambdaMatrixRowViewIterator< RowView >::
+LambdaMatrixRowViewIterator( const RowViewType& rowView,
+                             const IndexType& localIdx )
+: rowView( rowView ), localIdx( localIdx )
+{
+}
+
+template< typename RowView >
+__cuda_callable__ bool
+LambdaMatrixRowViewIterator< RowView >::
+operator==( const LambdaMatrixRowViewIterator& other ) const
+{
+   if( &this->rowView == &other.rowView &&
+       localIdx == other.localIdx )
+      return true;
+   return false;
+}
+
+template< typename RowView >
+__cuda_callable__ bool
+LambdaMatrixRowViewIterator< RowView >::
+operator!=( const LambdaMatrixRowViewIterator& other ) const
+{
+   return ! ( other == *this );
+}
+
+template< typename RowView >
+__cuda_callable__
+LambdaMatrixRowViewIterator< RowView >&
+LambdaMatrixRowViewIterator< RowView >::
+operator++()
+{
+   if( localIdx < rowView.getSize() )
+      localIdx ++;
+   return *this;
+}
+
+template< typename RowView >
+__cuda_callable__
+LambdaMatrixRowViewIterator< RowView >&
+LambdaMatrixRowViewIterator< RowView >::
+operator--()
+{
+   if( localIdx > 0 )
+      localIdx --;
+   return *this;
+}
+
+template< typename RowView >
+__cuda_callable__ auto
+LambdaMatrixRowViewIterator< RowView >::
+operator*() -> MatrixElementType
+{
+   return MatrixElementType(
+      this->rowView.getValue( this->localIdx ),
+      this->rowView.getRowIndex(),
+      this->rowView.getColumnIndex( this->localIdx ),
+      this->localIdx );
+}
+
+template< typename RowView >
+__cuda_callable__ auto
+LambdaMatrixRowViewIterator< RowView >::
+operator*() const -> const MatrixElementType
+{
+   return MatrixElementType(
+      this->rowView.getValue( this->localIdx ),
+      this->rowView.getRowIndex(),
+      this->rowView.getColumnIndex( this->localIdx ),
+      this->localIdx );
+}
+
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 702e79162d9c6984892df845811ef10e4cdff59f..1156a897e330cbb2837fb6e6eb6e793835b34cbe 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -37,11 +37,10 @@ using Algorithms::Segments::ElementsOrganization;
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
-          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
+          typename RealAllocator = typename Allocators::Default< Device >::template Allocator< std::remove_const_t< Real > > >
 class Matrix : public Object
 {
    public:
-      using ValuesVectorType = Containers::Vector< Real, Device, Index, RealAllocator >;
       using RealAllocatorType = RealAllocator;
       using RowsCapacitiesType = Containers::Vector< Index, Device, Index >;
       using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >;
@@ -72,7 +71,27 @@ class Matrix : public Object
        * \brief Type of base matrix view for constant instances.
        *
        */
-      using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >;
+      using ConstViewType = typename MatrixView< Real, Device, Index >::ConstViewType;
+
+      /**
+       * \brief Type of vector holding values of matrix elements.
+       */
+      using ValuesType = Containers::Vector< Real, Device, Index, RealAllocator >;
+
+      /**
+       * \brief Type of constant vector holding values of matrix elements.
+       */
+      using ConstValuesType = Containers::Vector< std::add_const_t< Real >, Device, Index, RealAllocator >;
+
+      /**
+       * \brief Type of vector view holding values of matrix elements.
+       */
+      using ValuesView = typename ViewType::ValuesView;
+
+      /**
+       * \brief Type of constant vector view holding values of matrix elements.
+       */
+      using ConstValuesView = typename ViewType::ConstValuesView;
 
       /**
        * \brief Construct a new Matrix object possibly with user defined allocator of the matrix values.
@@ -155,14 +174,14 @@ class Matrix : public Object
        *
        * \return constant reference to a vector with the matrix elements values.
        */
-      const ValuesVectorType& getValues() const;
+      const ValuesType& getValues() const;
 
       /**
        * \brief Returns a reference to a vector with the matrix elements values.
        *
        * \return constant reference to a vector with the matrix elements values.
        */
-      ValuesVectorType& getValues();
+      ValuesType& getValues();
 
       /**
        * \brief Comparison operator with another arbitrary matrix type.
@@ -220,7 +239,7 @@ class Matrix : public Object
       // TODO: remove
       //IndexType numberOfColors;
 
-      ValuesVectorType values;
+      ValuesType values;
 };
 
 /**
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index 57c79cd769704d6ddf576167cafe9d1b9b56eb26..c715f4dc37de0b2baf3f6abb2a7a1f2ec124c020 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -112,9 +112,9 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType&
+auto
 Matrix< Real, Device, Index, RealAllocator >::
-getValues() const
+getValues() const -> const ValuesType&
 {
    return this->values;
 }
@@ -123,9 +123,9 @@ template< typename Real,
           typename Device,
           typename Index,
           typename RealAllocator >
-typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType&
+auto
 Matrix< Real, Device, Index, RealAllocator >::
-getValues()
+getValues() -> ValuesType&
 {
    return this->values;
 }
diff --git a/src/TNL/Matrices/MatrixRowViewIterator.h b/src/TNL/Matrices/MatrixRowViewIterator.h
new file mode 100644
index 0000000000000000000000000000000000000000..cf99bea295f56948226ace980ba1e0019bf90756
--- /dev/null
+++ b/src/TNL/Matrices/MatrixRowViewIterator.h
@@ -0,0 +1,98 @@
+ /***************************************************************************
+                          MatrixRowViewIterator.h -  description
+                             -------------------
+    begin                : Mar 20, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+#include <TNL/Matrices/SparseMatrixElement.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename RowView >
+class MatrixRowViewIterator
+{
+
+   public:
+
+      /**
+       * \brief Type of SparseMatrixRowView
+       */
+      using RowViewType = RowView;
+
+      /**
+       * \brief The type of matrix elements.
+       */
+      using RealType = typename RowViewType::RealType;
+
+      /**
+       * \brief The type used for matrix elements indexing.
+       */
+      using IndexType = typename RowViewType::IndexType;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = typename RowView::MatrixElementType;
+
+      /**
+       * \brief Tells whether the parent matrix is a binary matrix.
+       * @return `true` if the matrix is binary.
+       */
+      static constexpr bool isBinary() { return RowViewType::isBinary(); };
+
+      __cuda_callable__
+      MatrixRowViewIterator( RowViewType& rowView,
+                                   const IndexType& localIdx );
+
+      /**
+       * \brief Comparison of two matrix row iterators.
+       *
+       * \param other is another matrix row iterator.
+       * \return \e true if both iterators points at the same point of the same matrix, \e false otherwise.
+       */
+      __cuda_callable__
+      bool operator==( const MatrixRowViewIterator& other ) const;
+
+      /**
+       * \brief Comparison of two matrix row iterators.
+       *
+       * \param other is another matrix row iterator.
+       * \return \e false if both iterators points at the same point of the same matrix, \e true otherwise.
+       */
+      __cuda_callable__
+      bool operator!=( const MatrixRowViewIterator& other ) const;
+
+      __cuda_callable__
+      MatrixRowViewIterator& operator++();
+
+      __cuda_callable__
+      MatrixRowViewIterator& operator--();
+
+      __cuda_callable__
+      MatrixElementType operator*();
+
+      __cuda_callable__
+      const MatrixElementType operator*() const;
+
+   protected:
+
+      RowViewType& rowView;
+
+      IndexType localIdx = 0;
+};
+
+
+   } // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MatrixRowViewIterator.hpp>
diff --git a/src/TNL/Matrices/MatrixRowViewIterator.hpp b/src/TNL/Matrices/MatrixRowViewIterator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7b233e47b07cc3acde2b41892966c92b8c49ec20
--- /dev/null
+++ b/src/TNL/Matrices/MatrixRowViewIterator.hpp
@@ -0,0 +1,95 @@
+/***************************************************************************
+                          SparseMatrixRowView.hpp -  description
+                             -------------------
+    begin                : Mar 20, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/SparseMatrixRowView.h>
+#include <TNL/Assert.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename RowView >
+__cuda_callable__
+MatrixRowViewIterator< RowView >::
+MatrixRowViewIterator( RowViewType& rowView,
+                             const IndexType& localIdx )
+: rowView( rowView ), localIdx( localIdx )
+{
+}
+
+template< typename RowView >
+__cuda_callable__ bool
+MatrixRowViewIterator< RowView >::
+operator==( const MatrixRowViewIterator& other ) const
+{
+   if( &this->rowView == &other.rowView &&
+       localIdx == other.localIdx )
+      return true;
+   return false;
+}
+
+template< typename RowView >
+__cuda_callable__ bool
+MatrixRowViewIterator< RowView >::
+operator!=( const MatrixRowViewIterator& other ) const
+{
+   return ! ( other == *this );
+}
+
+template< typename RowView >
+__cuda_callable__
+MatrixRowViewIterator< RowView >&
+MatrixRowViewIterator< RowView >::
+operator++()
+{
+   if( localIdx < rowView.getSize() )
+      localIdx ++;
+   return *this;
+}
+
+template< typename RowView >
+__cuda_callable__
+MatrixRowViewIterator< RowView >&
+MatrixRowViewIterator< RowView >::
+operator--()
+{
+   if( localIdx > 0 )
+      localIdx --;
+   return *this;
+}
+
+template< typename RowView >
+__cuda_callable__ auto
+MatrixRowViewIterator< RowView >::
+operator*() -> MatrixElementType
+{
+   return MatrixElementType(
+      this->rowView.getValue( this->localIdx ),
+      this->rowView.getRowIndex(),
+      this->rowView.getColumnIndex( this->localIdx ),
+      this->localIdx );
+}
+
+template< typename RowView >
+__cuda_callable__ auto
+MatrixRowViewIterator< RowView >::
+operator*() const -> const MatrixElementType
+{
+   return MatrixElementType(
+      this->rowView.getValue( this->localIdx ),
+      this->rowView.getRowIndex(),
+      this->rowView.getColumnIndex( this->localIdx ),
+      this->localIdx );
+}
+
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
index 7d8d9102d107864394e2ad29fc14b957e57aec18..6e4b89da365409989933c76b510720c804b80878 100644
--- a/src/TNL/Matrices/MatrixView.h
+++ b/src/TNL/Matrices/MatrixView.h
@@ -38,7 +38,6 @@ class MatrixView : public Object
       using RowsCapacitiesType = Containers::Vector< Index, Device, Index >;
       using RowsCapacitiesTypeView = Containers::VectorView< Index, Device, Index >;
       using ConstRowsCapacitiesTypeView = typename RowsCapacitiesTypeView::ConstViewType;
-      using ValuesView = Containers::VectorView< Real, Device, Index >;
 
       /**
        * \brief The type of matrix elements.
@@ -59,16 +58,27 @@ class MatrixView : public Object
        * \brief Type of base matrix view.
        *
        */
-      using ViewType = MatrixView< typename std::remove_const< Real >::type, Device, Index >;
+      using ViewType = MatrixView< Real, Device, Index >;
 
       /**
        * \brief Type of base matrix view for constant instances.
        *
        */
-      using ConstViewType = MatrixView< typename std::add_const< Real >::type, Device, Index >;
+      using ConstViewType = MatrixView< typename std::add_const_t< Real >, Device, Index >;
+
+      /**
+       * \brief Type of vector view holding values of matrix elements.
+       */
+      using ValuesView = Containers::VectorView< Real, Device, Index >;
+
+      /**
+       * \brief Type of constant vector view holding values of matrix elements.
+       */
+      using ConstValuesView = typename ValuesView::ConstViewType;
+
 
       /**
-       * \brief Basic construtor with no parameters.
+       * \brief Basic constructor with no parameters.
        */
       __cuda_callable__
       MatrixView();
diff --git a/src/TNL/Matrices/MatrixWriter.hpp b/src/TNL/Matrices/MatrixWriter.hpp
index 97310c19ed071793ed1c274cbb32abb31da4c45e..77dc5c250860f3eef4f9f6ccd3e251ac9d6fe9ce 100644
--- a/src/TNL/Matrices/MatrixWriter.hpp
+++ b/src/TNL/Matrices/MatrixWriter.hpp
@@ -152,12 +152,18 @@ writeMtx( std::ostream& str,
    str << std::setw( 9 ) << matrix.getRows() << " " << std::setw( 9 ) << matrix.getColumns() << " " << std::setw( 12 ) << matrix.getNonzeroElementsCount() << std::endl;
    std::ostream* str_ptr = &str;
    auto cout_ptr = &std::cout;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, RealType value, bool& compute ) mutable {
-      if( value != 0 )
+   auto f = [=] __cuda_callable__ ( const typename Matrix::ConstRowView& row ) mutable {
+      auto rowIdx = row.getRowIndex();
+      for( IndexType localIdx = 0; localIdx < row.getSize(); localIdx++ )
       {
-         *str_ptr << std::setw( 9 ) << rowIdx + 1 << std::setw( 9 ) << columnIdx + 1 << std::setw( 12 ) << value << std::endl;
-         if( verbose )
-            *cout_ptr << "Drawing the row " << rowIdx << "      \r" << std::flush;
+         IndexType columnIdx = row.getColumnIndex( localIdx );
+         RealType value = row.getValue( localIdx );
+         if( value != 0 )
+         {
+            *str_ptr << std::setw( 9 ) << rowIdx + 1 << std::setw( 9 ) << columnIdx + 1 << std::setw( 12 ) << value << std::endl;
+            if( verbose )
+               *cout_ptr << "Drawing the row " << rowIdx << "      \r" << std::flush;
+         }
       }
    };
    matrix.sequentialForAllRows( f );
diff --git a/src/TNL/Matrices/MultidiagonalMatrix.h b/src/TNL/Matrices/MultidiagonalMatrix.h
index 4c07354cd0fcccc6c15c166fad81d417c509b1b4..e29796a1e12a544334a5a786e0896ef7fe4a83fc 100644
--- a/src/TNL/Matrices/MultidiagonalMatrix.h
+++ b/src/TNL/Matrices/MultidiagonalMatrix.h
@@ -77,8 +77,8 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       // Supporting types - they are not important for the user
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesVectorType = typename BaseType::ValuesVectorType;
-      using ValuesViewType = typename ValuesVectorType::ViewType;
+      using ValuesType = typename BaseType::ValuesType;
+      using ValuesView = typename ValuesType::ViewType;
       using IndexerType = details::MultidiagonalMatrixIndexer< Index, Organization >;
       using DiagonalsOffsetsType = Containers::Vector< Index, Device, Index, IndexAllocator >;
       using DiagonalsOffsetsView = typename DiagonalsOffsetsType::ViewType;
@@ -134,12 +134,12 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
       /**
        * \brief Type for accessing matrix rows.
        */
-      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >;
+      using RowView = typename ViewType::RowView;
 
       /**
        * \brief Type for accessing constant matrix rows.
        */
-      using ConstRowView = typename RowView::ConstViewType;
+      using ConstRowView = typename ViewType::ConstViewType;
 
       /**
        * \brief Helper type for getting self type or its modifications.
@@ -509,7 +509,7 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * See \ref MultidiagonalMatrixRowView.
        */
       __cuda_callable__
-      const RowView getRow( const IndexType& rowIdx ) const;
+      const ConstRowView getRow( const IndexType& rowIdx ) const;
 
       /**
        * \brief Set all matrix elements to given value.
@@ -526,7 +526,7 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
-       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forEachElement.
+       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -551,7 +551,7 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
-       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forEachElement.
+       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -580,7 +580,7 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
-       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forEachElement.
+       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -617,12 +617,12 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixExample_rowsReduction.out
+       * \include MultidiagonalMatrixExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on matrix rows for constant instances.
@@ -644,12 +644,12 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixExample_rowsReduction.out
+       * \include MultidiagonalMatrixExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on all matrix rows.
@@ -669,12 +669,12 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixExample_allRowsReduction.out
+       * \include MultidiagonalMatrixExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on all matrix rows for constant instances.
@@ -694,12 +694,12 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_reduceAllRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixExample_allRowsReduction.out
+       * \include MultidiagonalMatrixExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for iteration over matrix rows for constant instances.
@@ -783,7 +783,7 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \include MultidiagonalMatrixExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function& function ) const;
 
       /**
        * \brief This method calls \e forElements for all matrix rows.
@@ -799,7 +799,107 @@ class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \include MultidiagonalMatrixExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllElements( Function& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
diff --git a/src/TNL/Matrices/MultidiagonalMatrix.hpp b/src/TNL/Matrices/MultidiagonalMatrix.hpp
index 2a7704fc436ba2bfb9c2a3d7ad0b1919ea47a713..7e6ac450f54fc50e772e51a18bd2a8fb98e406e0 100644
--- a/src/TNL/Matrices/MultidiagonalMatrix.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrix.hpp
@@ -404,7 +404,7 @@ template< typename Real,
 __cuda_callable__
 auto
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-getRow( const IndexType& rowIdx ) const -> const RowView
+getRow( const IndexType& rowIdx ) const -> const ConstRowView
 {
    return this->view.getRow( rowIdx );
 }
@@ -477,9 +477,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   this->view.reduceRows( first, last, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -491,9 +491,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   this->view.reduceRows( first, last, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -505,9 +505,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->view.reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -519,9 +519,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->view.reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -561,7 +561,7 @@ template< typename Real,
    template< typename Function >
 void
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-forEachElement( Function& function ) const
+forAllElements( Function& function ) const
 {
    this->view.forElements( 0, this->getRows(), function );
 }
@@ -575,11 +575,67 @@ template< typename Real,
    template< typename Function >
 void
 MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
-forEachElement( Function& function )
+forAllElements( Function& function )
 {
    this->view.forElements( 0, this->getRows(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   this->getView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   this->getConstView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forAllRows( Function&& function )
+{
+   this->getView().forAllRows( function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::
+forAllRows( Function&& function ) const
+{
+   this->getConsView().forAllRows( function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -820,7 +876,7 @@ operator=( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, Rea
          auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
             value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
          };
-         this->forEachElement( f );
+         this->forAllElements( f );
       }
       else
       {
diff --git a/src/TNL/Matrices/MultidiagonalMatrixElement.h b/src/TNL/Matrices/MultidiagonalMatrixElement.h
new file mode 100644
index 0000000000000000000000000000000000000000..3672526eabd584f7622f2743175d45068e1d7f9d
--- /dev/null
+++ b/src/TNL/Matrices/MultidiagonalMatrixElement.h
@@ -0,0 +1,68 @@
+/***************************************************************************
+                          MultidiagonalMatrixElement.h -  description
+                             -------------------
+    begin                : Mar 22, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+
+namespace TNL {
+namespace Matrices {
+
+
+template< typename Real,
+          typename Index >
+class MultidiagonalMatrixElement
+{
+   public:
+
+      using RealType = Real;
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      MultidiagonalMatrixElement( RealType& value,
+                                  const IndexType& rowIdx,
+                                  const IndexType& columnIdx,
+                                  const IndexType& localIdx )
+      : value_( value ), rowIdx( rowIdx ), columnIdx( columnIdx ), localIdx( localIdx ) {};
+
+      __cuda_callable__
+      RealType& value() { return value_; };
+
+      __cuda_callable__
+      const RealType& value() const { return value_; };
+
+      __cuda_callable__
+      const IndexType& rowIndex() const { return rowIdx; };
+
+      __cuda_callable__
+      IndexType& columnIndex() { return columnIdx; };
+
+      __cuda_callable__
+      const IndexType& columnIndex() const { return columnIdx; };
+
+      __cuda_callable__
+      const IndexType& localIndex() const { return localIdx; };
+
+   protected:
+
+      RealType& value_;
+
+      const IndexType& rowIdx;
+
+      IndexType columnIdx;
+
+      const IndexType& localIdx;
+};
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
index 6c89ffc1b42454b03c479986ccd4e4a8c94fba38..dcfcfe55ce9388649710cf16d8a2d164fa555858 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h
@@ -10,25 +10,28 @@
 
 #pragma once
 
+#include <TNL/Matrices/MultidiagonalMatrixElement.h>
+#include <TNL/Matrices/MatrixRowViewIterator.h>
+
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 /**
  * \brief RowView is a simple structure for accessing rows of multidiagonal matrix.
- * 
+ *
  * \tparam ValuesView is a vector view storing the matrix elements values.
  * \tparam Indexer is type of object responsible for indexing and organization of
  *    matrix elements.
  * \tparam DiagonalsOffsetsView_ is a container view holding offsets of
  *    diagonals of multidiagonal matrix.
- * 
+ *
  * See \ref MultidiagonalMatrix and \ref MultidiagonalMatrixView.
- * 
+ *
  * \par Example
  * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp
  * \par Output
- * \include MultidiagonalatrixExample_getRow.out
- * 
+ * \include MultidiagonalMatrixExample_getRow.out
+ *
  * \par Example
  * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp
  * \par Output
@@ -76,21 +79,36 @@ class MultidiagonalMatrixRowView
       /**
        * \brief Type of constant container view used for storing the column indexes of the matrix elements.
        */
-      using ConstDiagonalsOffsetsViewType = typename DiagonalsOffsetsView::ConstViewType;
+      using ConstDiagonalsOffsetsView = typename DiagonalsOffsetsView::ConstViewType;
 
       /**
        * \brief Type of constant indexer view.
        */
-      using ConstIndexerViewType = typename Indexer::ConstType;
+      using ConstIndexerViewType = typename IndexerType::ConstType;
+
+      /**
+       * \brief Type of constant sparse matrix row view.
+       */
+      using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >;
 
       /**
        * \brief Type of constant sparse matrix row view.
        */
-      using ConstViewType = MultidiagonalMatrixRowView< ConstValuesViewType, ConstIndexerViewType, ConstDiagonalsOffsetsViewType >;
+      using ConstRowView = MultidiagonalMatrixRowView< ConstValuesViewType, ConstIndexerViewType, ConstDiagonalsOffsetsView >;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = MultidiagonalMatrixElement< RealType, IndexType >;
+
+      /**
+       * \brief Type of iterator for the matrix row.
+       */
+      using IteratorType = MatrixRowViewIterator< RowView >;
 
       /**
        * \brief Constructor with all necessary data.
-       * 
+       *
        * \param rowIdx is index of the matrix row this RowView refer to.
        * \param diagonalsOffsets is a vector view holding offsets of matrix diagonals,
        * \param values is a vector view holding values of matrix elements.
@@ -104,17 +122,25 @@ class MultidiagonalMatrixRowView
 
       /**
        * \brief Returns number of diagonals of the multidiagonal matrix.
-       * 
+       *
        * \return number of diagonals of the multidiagonal matrix.
        */
       __cuda_callable__
       IndexType getSize() const;
 
+      /**
+       * \brief Returns the matrix row index.
+       *
+       * \return matrix row index.
+       */
+      __cuda_callable__
+      const IndexType& getRowIndex() const;
+
       /**
        * \brief Computes column index of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the subdiagonal.
-       * 
+       *
        * \return column index of matrix element on given subdiagonal.
        */
       __cuda_callable__
@@ -122,9 +148,9 @@ class MultidiagonalMatrixRowView
 
       /**
        * \brief Returns value of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the subdiagonal.
-       * 
+       *
        * \return constant reference to matrix element value.
        */
       __cuda_callable__
@@ -132,9 +158,9 @@ class MultidiagonalMatrixRowView
 
       /**
        * \brief Returns value of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the subdiagonal.
-       * 
+       *
        * \return non-constant reference to matrix element value.
        */
       __cuda_callable__
@@ -142,13 +168,46 @@ class MultidiagonalMatrixRowView
 
       /**
        * \brief Changes value of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the matrix subdiagonal.
        * \param value is the new value of the matrix element.
        */
       __cuda_callable__
       void setElement( const IndexType localIdx,
                        const RealType& value );
+
+      /**
+       * \brief Returns iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      IteratorType begin();
+
+      /**
+       * \brief Returns iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      IteratorType end();
+
+      /**
+       * \brief Returns constant iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      const IteratorType cbegin() const;
+
+      /**
+       * \brief Returns constant iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      const IteratorType cend() const;
+
    protected:
 
       IndexType rowIdx;
diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
index 37cdd455a9838ac5e67ded80009a44bf4dd05796..0f43c6ad8b8dfcdaee85da24e6192ed3e8c05395 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp
@@ -11,7 +11,7 @@
 #pragma once
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
@@ -33,6 +33,16 @@ getSize() const -> IndexType
    return diagonalsOffsets.getSize();//indexer.getRowSize( rowIdx );
 }
 
+
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
+getRowIndex() const -> const IndexType&
+{
+   return this->rowIdx;
+}
+
 template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
 auto
@@ -64,7 +74,7 @@ getValue( const IndexType localIdx ) -> RealType&
 
 template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
 __cuda_callable__
-void 
+void
 MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
 setElement( const IndexType localIdx,
             const RealType& value )
@@ -72,5 +82,41 @@ setElement( const IndexType localIdx,
    this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
 }
 
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
+begin() -> IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
+end() -> IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
+cbegin() const -> const IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView >
+__cuda_callable__
+auto
+MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >::
+cend() const -> const IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h
index a66431b18b846c6a95fff68659639895608b3f77..bc3de664b9142bd9ad1648a8b3bfa0e5fb7bd94b 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.h
@@ -78,6 +78,11 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        */
       using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >;
 
+      /**
+       * \brief Type for accessing constant matrix rows.
+       */
+      using ConstRowView = typename RowView::ConstRowView;
+
       /**
        * \brief Helper type for getting self type or its modifications.
        */
@@ -268,7 +273,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * See \ref MultidiagonalMatrixRowView.
        */
       __cuda_callable__
-      const RowView getRow( const IndexType& rowIdx ) const;
+      const ConstRowView getRow( const IndexType& rowIdx ) const;
 
       /**
        * \brief Set all matrix elements to given value.
@@ -285,7 +290,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
-       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forEachElement.
+       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -310,7 +315,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
-       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forEachElement.
+       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -338,7 +343,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow
-       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forEachElement.
+       * or \ref MultidiagonalMatrix::forElements and \ref MultidiagonalMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -374,12 +379,12 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixViewExample_rowsReduction.out
+       * \include MultidiagonalMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on matrix rows.
@@ -401,12 +406,12 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixViewExample_rowsReduction.out
+       * \include MultidiagonalMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on all matrix rows for constant instances.
@@ -426,12 +431,12 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixViewExample_allRowsReduction.out
+       * \include MultidiagonalMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on all matrix rows.
@@ -451,12 +456,12 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include MultidiagonalMatrixViewExample_allRowsReduction.out
+       * \include MultidiagonalMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for iteration over all matrix rows for constant instances.
@@ -540,7 +545,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \include MultidiagonalMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function& function ) const;
 
       /**
        * \brief This method calls \e forElements for all matrix rows.
@@ -556,7 +561,107 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \include MultidiagonalMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllElements( Function& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrixView::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrixView::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref MultidiagonalMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::MultidiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include MultidiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
index 44c43da7f9640f4f23d5b47e2c37f4f36e60b42a..7dadde222a41ec5832fa1bffebad8b86ba75427f 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp
@@ -147,7 +147,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   this->allRowsReduction( fetch, reduce, keep, 0 );
+   this->reduceAllRows( fetch, reduce, keep, 0 );
 }
 
 template< typename Real,
@@ -219,7 +219,7 @@ setValue( const RealType& v )
    auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable {
       value = newValue;
    };
-   this->forEachElement( f );
+   this->forAllElements( f );
 }
 
 template< typename Real,
@@ -229,9 +229,9 @@ template< typename Real,
 __cuda_callable__
 auto
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-getRow( const IndexType& rowIdx ) const -> const RowView
+getRow( const IndexType& rowIdx ) const -> const ConstRowView
 {
-   return RowView( rowIdx, this->diagonalsOffsets.getView(), this->values.getView(), this->indexer );
+   return ConstRowView( rowIdx, this->diagonalsOffsets.getView(), this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -356,7 +356,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
 {
    using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
@@ -385,7 +385,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ )
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ )
 {
    using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
@@ -414,9 +414,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -426,9 +426,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -490,7 +490,7 @@ template< typename Real,
    template< typename Function >
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-forEachElement( Function& function ) const
+forAllElements( Function& function ) const
 {
    this->forElements( 0, this->indxer.getNonEmptyRowsCount(), function );
 }
@@ -502,11 +502,69 @@ template< typename Real,
    template< typename Function >
 void
 MultidiagonalMatrixView< Real, Device, Index, Organization >::
-forEachElement( Function& function )
+forAllElements( Function& function )
 {
    this->forElements( 0, this->indexer.getNonemptyRowsCount(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, Organization >::
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   auto view = *this;
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto rowView = view.getRow( rowIdx );
+      function( rowView );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, Organization >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = *this;
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto rowView = view.getRow( rowIdx );
+      function( rowView );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, Organization >::
+forAllRows( Function&& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+MultidiagonalMatrixView< Real, Device, Index, Organization >::
+forAllRows( Function&& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -517,7 +575,7 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 sequentialForRows( IndexType begin, IndexType end, Function& function ) const
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename Real,
@@ -530,7 +588,7 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >::
 sequentialForRows( IndexType begin, IndexType end, Function& function )
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename Real,
@@ -593,9 +651,9 @@ vectorProduct( const InVector& inVector,
    if( end == 0 )
       end = this->getRows();
    if( outVectorMultiplicator == ( RealType ) 0.0 )
-      this->rowsReduction( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 );
+      this->reduceRows( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 );
    else
-      this->rowsReduction( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 );
+      this->reduceRows( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -634,11 +692,11 @@ addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_
          value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
       };
       if( thisMult == 0.0 )
-         this->forEachElement( add0 );
+         this->forAllElements( add0 );
       else if( thisMult == 1.0 )
-         this->forEachElement( add1 );
+         this->forAllElements( add1 );
       else
-         this->forEachElement( addGen );
+         this->forAllElements( addGen );
    }*/
 }
 
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 0e2b091a409f42d62dc23e9e04cd91b1c657cfc8..b9b7dceae153a0b6e3572c75c0cb4950566becff 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -61,14 +61,14 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       // Supporting types - they are not important for the user
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
-      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType;
+      using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesType;
       using ValuesViewType = typename ValuesVectorType::ViewType;
       using ConstValuesViewType = typename ValuesViewType::ConstViewType;
-      using ColumnsIndexesVectorType = Containers::Vector< Index, Device, Index, IndexAllocator >;
+      using ColumnsIndexesVectorType = Containers::Vector< typename TNL::copy_const< Index >::template from< Real >::type, Device, Index, IndexAllocator >;
       using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType;
       using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
-      using RowsCapacitiesType = Containers::Vector< Index, Device, Index, IndexAllocator >;
-      using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >;
+      using RowsCapacitiesType = Containers::Vector< std::remove_const_t< Index >, Device, Index, IndexAllocator >;
+      using RowsCapacitiesView = Containers::VectorView< std::remove_const_t< Index >, Device, Index >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
 
       /**
@@ -88,7 +88,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       /**
        * \brief The type of matrix elements.
        */
-      using RealType = Real;
+      using RealType = std::remove_const_t< Real >;
 
       using ComputeRealType = ComputeReal;
 
@@ -151,12 +151,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
       /**
        * \brief Type for accessing matrix rows.
        */
-      using RowView = SparseMatrixRowView< typename SegmentsType::SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >;
+      using RowView = typename ViewType::RowView;
 
       /**
        * \brief Type for accessing constant matrix rows.
        */
-      using ConstRowView = typename RowView::ConstViewType;
+      using ConstRowView = typename ViewType::ConstRowView;
 
       /**
        * \brief Helper type for getting self type or its modifications.
@@ -529,7 +529,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
-       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forEachElement.
+       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -554,7 +554,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
-       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forEachElement.
+       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -583,7 +583,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
-       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forEachElement.
+       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -620,12 +620,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cpp
        * \par Output
-       * \include SparseMatrixExample_rowsReduction.out
+       * \include SparseMatrixExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on matrix rows for constant instances.
@@ -647,12 +647,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_reduceRows.cpp
        * \par Output
-       * \include SparseMatrixExample_rowsReduction.out
+       * \include SparseMatrixExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on all matrix rows.
@@ -672,12 +672,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cpp
        * \par Output
-       * \include SparseMatrixExample_allRowsReduction.out
+       * \include SparseMatrixExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on all matrix rows for constant instances.
@@ -697,88 +697,198 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_reduceAllRows.cpp
        * \par Output
-       * \include SparseMatrixExample_allRowsReduction.out
+       * \include SparseMatrixExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
-       * \brief Method for iteration over all matrix rows for constant instances.
+       * \brief Method for parallel iteration over matrix elements of given rows for constant instances.
        *
        * \tparam Function is type of lambda function that will operate on matrix elements.
-       *    It is should have form like
-       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for element of given rows.
+       *
+       * The lambda function `function` should be declared like follows:
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute ) { ... };
+       * ```
+       *
        *  The \e localIdx parameter is a rank of the non-zero element in given row.
        *  If the 'compute' variable is set to false the iteration over the row can
        *  be interrupted.
        *
-       * \param begin defines beginning of the range [begin,end) of rows to be processed.
-       * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param function is an instance of the lambda function to be called in each row.
-       *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forElements.cpp
        * \par Output
-       * \include SparseMatrixExample_forRows.out
+       * \include SparseMatrixExample_forElements.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function& function ) const;
+      void forElements( IndexType begin, IndexType end, Function&& function ) const;
 
       /**
-       * \brief Method for iteration over all matrix rows for non-constant instances.
+       * \brief Method for parallel iteration over all matrix elements of given rows for non-constant instances.
        *
        * \tparam Function is type of lambda function that will operate on matrix elements.
-       *    It is should have form like
-       *  `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each element of given rows.
+       *
+       * The lambda function `function` should be declared like follows:
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute ) mutable { ... }
+       * ```
+       *
        *  The \e localIdx parameter is a rank of the non-zero element in given row.
        *  If the 'compute' variable is set to false the iteration over the row can
        *  be interrupted.
        *
-       * \param begin defines beginning of the range [begin,end) of rows to be processed.
-       * \param end defines ending of the range [begin,end) of rows to be processed.
-       * \param function is an instance of the lambda function to be called in each row.
-       *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forElements.cpp
        * \par Output
-       * \include SparseMatrixExample_forRows.out
+       * \include SparseMatrixExample_forElements.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function& function );
+      void forElements( IndexType begin, IndexType end, Function&& function );
 
       /**
-       * \brief This method calls \e forElements for all matrix rows (for constant instances).
+       * \brief Method for parallel iteration over all matrix elements for constant instances.
        *
        * See \ref SparseMatrix::forElements.
        *
        * \tparam Function is a type of lambda function that will operate on matrix elements.
-       * \param function  is an instance of the lambda function to be called in each row.
+       * \param function  is an instance of the lambda function to be called for each matrix element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forElements.cpp
        * \par Output
-       * \include SparseMatrixExample_forAllRows.out
+       * \include SparseMatrixExample_forElements.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function&& function ) const;
 
       /**
-       * \brief This method calls \e forElements for all matrix rows.
+       * \brief Method for parallel iteration over all matrix elements for non-constant instances.
        *
        * See \ref SparseMatrix::forElements.
        *
        * \tparam Function is a type of lambda function that will operate on matrix elements.
-       * \param function  is an instance of the lambda function to be called in each row.
+       * \param function  is an instance of the lambda function to be called for each matrix element.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forElements.cpp
+       * \par Output
+       * \include SparseMatrixExample_forElements.out
+       */
+      template< typename Function >
+      void forAllElements( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrix::RowView.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp
        * \par Output
-       * \include SparseMatrixExample_forAllRows.out
+       * \include SparseMatrixExample_forRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 1c4524d3fc8d13e76f912dbf2a06d77118e9847d..6f701a3ea7d3680091f3012fc0921e9ca85e6854 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -498,10 +498,10 @@ template< typename Real,
           typename RealAllocator,
           typename IndexAllocator >
 __cuda_callable__
-Real
+auto
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
 getElement( const IndexType row,
-            const IndexType column ) const
+            const IndexType column ) const -> RealType
 {
    return this->view.getElement( row, column );
 }
@@ -539,9 +539,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
 {
-   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+   this->view.reduceRows( begin, end, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -555,9 +555,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
-   this->view.rowsReduction( begin, end, fetch, reduce, keep, zero );
+   this->view.reduceRows( begin, end, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -571,9 +571,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -587,9 +587,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -603,7 +603,7 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-forElements( IndexType begin, IndexType end, Function& function ) const
+forElements( IndexType begin, IndexType end, Function&& function ) const
 {
    this->view.forElements( begin, end, function );
 }
@@ -619,7 +619,7 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-forElements( IndexType begin, IndexType end, Function& function )
+forElements( IndexType begin, IndexType end, Function&& function )
 {
    this->view.forElements( begin, end, function );
 }
@@ -635,7 +635,7 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-forEachElement( Function& function ) const
+forAllElements( Function&& function ) const
 {
    this->forElements( 0, this->getRows(), function );
 }
@@ -651,11 +651,75 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
-forEachElement( Function& function )
+forAllElements( Function&& function )
 {
    this->forElements( 0, this->getRows(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename ComputeReal,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   this->getView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename ComputeReal,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   this->getConstView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename ComputeReal,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
+forAllRows( Function&& function )
+{
+   this->getView().forAllRows( function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename, typename > class Segments,
+          typename ComputeReal,
+          typename RealAllocator,
+          typename IndexAllocator >
+   template< typename Function >
+void
+SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::
+forAllRows( Function&& function ) const
+{
+   this->getConsView().forAllRows( function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -837,7 +901,7 @@ operator=( const DenseMatrix< Real_, Device_, Index_, Organization, RealAllocato
                values_view[ thisGlobalIdx ] = value;
          }
       };
-      matrix.forEachElement( f );
+      matrix.forAllElements( f );
    }
    else
    {
@@ -950,7 +1014,7 @@ operator=( const RHSMatrix& matrix )
             rowLocalIndexes_view[ rowIdx ] = localIdx;
          }
       };
-      matrix.forEachElement( f );
+      matrix.forAllElements( f );
    }
    else
    {
diff --git a/src/TNL/Matrices/SparseMatrixElement.h b/src/TNL/Matrices/SparseMatrixElement.h
new file mode 100644
index 0000000000000000000000000000000000000000..485fb919b95a0938dc9d4b3b67093be31008f0a6
--- /dev/null
+++ b/src/TNL/Matrices/SparseMatrixElement.h
@@ -0,0 +1,69 @@
+/***************************************************************************
+                          SparseMatrixElement.h -  description
+                             -------------------
+    begin                : Mar 21, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <ostream>
+
+#include <TNL/Cuda/CudaCallable.h>
+
+namespace TNL {
+namespace Matrices {
+
+
+template< typename Real,
+          typename Index,
+          bool isBinary_ = false >
+class SparseMatrixElement
+{
+   public:
+
+      using RealType = Real;
+
+      using IndexType = Index;
+
+      __cuda_callable__
+      SparseMatrixElement( RealType& value,
+                           const IndexType& rowIdx,
+                           IndexType& columnIdx,
+                           const IndexType& localIdx )
+      : value_( value ), rowIdx( rowIdx ), columnIdx( columnIdx ), localIdx( localIdx ) {};
+
+      __cuda_callable__
+      RealType& value() { return value_; };
+
+      __cuda_callable__
+      const RealType& value() const { return value_; };
+
+      __cuda_callable__
+      const IndexType& rowIndex() const { return rowIdx; };
+
+      __cuda_callable__
+      IndexType& columnIndex() { return columnIdx; };
+
+      __cuda_callable__
+      const IndexType& columnIndex() const { return columnIdx; };
+
+      __cuda_callable__
+      const IndexType& localIndex() const { return localIdx; };
+
+   protected:
+
+      RealType& value_;
+
+      const IndexType& rowIdx;
+
+      IndexType& columnIdx;
+
+      const IndexType& localIdx;
+};
+
+   } // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h
index 84da4e064c44c3198c7665cb2334d7a0ae1d0efa..4976a420e22fb4544a4fbb454e7a17574105098d 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.h
+++ b/src/TNL/Matrices/SparseMatrixRowView.h
@@ -13,6 +13,7 @@
 #include <ostream>
 
 #include <TNL/Cuda/CudaCallable.h>
+#include <TNL/Matrices/MatrixRowViewIterator.h>
 
 namespace TNL {
 namespace Matrices {
@@ -80,10 +81,25 @@ class SparseMatrixRowView
        */
       using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
 
+      /**
+       * \brief Type of sparse matrix row view.
+       */
+      using RowView = SparseMatrixRowView< SegmentView, ValuesViewType, ColumnsIndexesViewType, isBinary_ >;
+
       /**
        * \brief Type of constant sparse matrix row view.
        */
-      using ConstViewType = SparseMatrixRowView< SegmentView, ConstValuesViewType, ConstColumnsIndexesViewType, isBinary_ >;
+      using ConstView = SparseMatrixRowView< SegmentView, ConstValuesViewType, ConstColumnsIndexesViewType, isBinary_ >;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = SparseMatrixElement< RealType, IndexType >;
+
+      /**
+       * \brief Type of iterator for the matrix row.
+       */
+      using IteratorType = MatrixRowViewIterator< RowView >;
 
       /**
        * \brief Tells whether the parent matrix is a binary matrix.
@@ -111,6 +127,14 @@ class SparseMatrixRowView
       __cuda_callable__
       IndexType getSize() const;
 
+      /**
+       * \brief Returns the matrix row index.
+       *
+       * \return matrix row index.
+       */
+      __cuda_callable__
+      const IndexType& getRowIndex() const;
+
       /**
        * \brief Returns constants reference to a column index of an element with given rank in the row.
        *
@@ -169,7 +193,7 @@ class SparseMatrixRowView
        */
       __cuda_callable__
       void setColumnIndex( const IndexType localIdx,
-                           const RealType& columnIndex );
+                           const IndexType& columnIndex );
 
       /**
        * \brief Sets both a value and a column index of matrix element with given rank in the matrix row.
@@ -198,6 +222,38 @@ class SparseMatrixRowView
       __cuda_callable__
       bool operator==( const SparseMatrixRowView< _SegmentView, _ValuesView, _ColumnsIndexesView, _isBinary >& other ) const;
 
+      /**
+       * \brief Returns iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      IteratorType begin();
+
+      /**
+       * \brief Returns iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      IteratorType end();
+
+      /**
+       * \brief Returns constant iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      const IteratorType cbegin() const;
+
+      /**
+       * \brief Returns constant iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      const IteratorType cend() const;
+
    protected:
 
       SegmentViewType segmentView;
diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp
index a0a9c2604b8ee61d4fa7334f4b550756ea7fffcf..82ae9b8706e4998fc88f2403afd109bb68f61f51 100644
--- a/src/TNL/Matrices/SparseMatrixRowView.hpp
+++ b/src/TNL/Matrices/SparseMatrixRowView.hpp
@@ -40,6 +40,18 @@ getSize() const -> IndexType
    return segmentView.getSize();
 }
 
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__
+auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+getRowIndex() const -> const IndexType&
+{
+   return segmentView.getSegmentIndex();
+}
+
 template< typename SegmentView,
           typename ValuesView,
           typename ColumnsIndexesView,
@@ -106,6 +118,20 @@ setValue( const IndexType localIdx,
    }
 }
 
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ void
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+setColumnIndex( const IndexType localIdx,
+                const IndexType& columnIndex )
+{
+   TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." );
+   const IndexType globalIdx = segmentView.getGlobalIndex( localIdx );
+   this->columnIndexes[ globalIdx ] = columnIndex;
+}
+
 template< typename SegmentView,
           typename ValuesView,
           typename ColumnsIndexesView,
@@ -155,6 +181,50 @@ operator==( const SparseMatrixRowView< _SegmentView, _ValuesView, _ColumnsIndexe
    return true;
 }
 
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+begin() -> IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+end() -> IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+cbegin() const -> const IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename SegmentView,
+          typename ValuesView,
+          typename ColumnsIndexesView,
+          bool isBinary_ >
+__cuda_callable__ auto
+SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::
+cend() const -> const IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
 template< typename SegmentView,
           typename ValuesView,
           typename ColumnsIndexesView,
@@ -171,5 +241,6 @@ std::ostream& operator<<( std::ostream& str, const SparseMatrixRowView< SegmentV
    return str;
 }
 
+
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index a74dab43f655da2da19368b49e335927dce26db5..8651ad1c3993bb8bf4abbe8afc7d9d1fc1ee9948 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -15,6 +15,7 @@
 #include <TNL/Allocators/Default.h>
 #include <TNL/Algorithms/Segments/CSR.h>
 #include <TNL/Matrices/SparseMatrixRowView.h>
+#include <TNL/TypeTraits.h>
 
 namespace TNL {
 namespace Matrices {
@@ -75,7 +76,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       using BaseType = MatrixView< Real, Device, Index >;
       using ValuesViewType = typename BaseType::ValuesView;
       using ConstValuesViewType = typename ValuesViewType::ConstViewType;
-      using ColumnsIndexesViewType = Containers::VectorView< Index, Device, Index >;
+      using ColumnsIndexesViewType = Containers::VectorView< typename TNL::copy_const< Index >::template from< Real >::type, Device, Index >;
       using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType;
       using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >;
       using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType;
@@ -125,12 +126,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       /**
        * \brief Type of related matrix view.
        */
-      using ViewType = SparseMatrixView< std::remove_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >;
+      using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >;
 
       /**
        * \brief Matrix view type for constant instances.
        */
-      using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, std::add_const_t< Index >, MatrixType, SegmentsViewTemplate >;
+      using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >;
 
       /**
        * \brief Type for accessing matrix rows.
@@ -140,7 +141,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       /**
        * \brief Type for accessing constant matrix rows.
        */
-      using ConstRowView = typename RowView::ConstViewType;
+      using ConstRowView = SparseMatrixRowView< typename SegmentsViewType::SegmentViewType, ConstValuesViewType, ConstColumnsIndexesViewType, isBinary() >;;
 
       /**
        * \brief Helper type for getting self type or its modifications.
@@ -320,7 +321,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
-       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forEachElement.
+       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -345,7 +346,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
-       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forEachElement.
+       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -373,7 +374,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
-       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forEachElement.
+       * or \ref SparseMatrix::forElements and \ref SparseMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -410,12 +411,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include SparseMatrixViewExample_rowsReduction.out
+       * \include SparseMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on matrix rows for constant instances.
@@ -437,12 +438,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include SparseMatrixViewExample_rowsReduction.out
+       * \include SparseMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on all matrix rows.
@@ -462,12 +463,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include SparseMatrixViewExample_allRowsReduction.out
+       * \include SparseMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on all matrix rows for constant instances.
@@ -487,12 +488,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include SparseMatrixViewExample_allRowsReduction.out
+       * \include SparseMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for iteration over all matrix rows for constant instances.
@@ -552,7 +553,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * \include SparseMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function& function ) const;
 
       /**
        * \brief This method calls \e forElements for all matrix rows.
@@ -568,7 +569,108 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * \include SparseMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllElements( Function& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrixView::forElements where more than one thread can be mapped to each row.
+
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrixView::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref SparseMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::SparseMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include SparseMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index e7842a50a5065fb4ccf322fe92ba533497e326f5..593c100a98f93730d7175f3aa0ed2f287e1bfd94 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -134,7 +134,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   this->allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   this->reduceAllRows( fetch, std::plus<>{}, keep, 0 );
 }
 
 template< typename Real,
@@ -157,7 +157,7 @@ getRowCapacities( Vector& rowLengths ) const
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   this->allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   this->reduceAllRows( fetch, std::plus<>{}, keep, 0 );
 }
 
 template< typename Real,
@@ -348,10 +348,10 @@ template< typename Real,
           template< typename, typename > class SegmentsView,
           typename ComputeReal >
 __cuda_callable__
-Real
+auto
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
 getElement( IndexType row,
-            IndexType column ) const
+            IndexType column ) const -> RealType
 {
    TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." );
    TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." );
@@ -504,7 +504,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero )
 {
    auto columns_view = this->columnIndexes.getView();
    auto values_view = this->values.getView();
@@ -532,7 +532,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
-rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const
 {
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
@@ -561,9 +561,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -575,9 +575,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
-allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -639,7 +639,7 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
-forEachElement( Function& function ) const
+forAllElements( Function& function ) const
 {
    this->forElements( 0, this->getRows(), function );
 }
@@ -653,11 +653,81 @@ template< typename Real,
    template< typename Function >
 void
 SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
-forEachElement( Function& function )
+forAllElements( Function& function )
 {
    this->forElements( 0, this->getRows(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView,
+          typename ComputeReal >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   auto columns_view = this->columnIndexes.getView();
+   auto values_view = this->values.getView();
+   using SegmentViewType = typename SegmentsViewType::SegmentViewType;
+   auto f = [=] __cuda_callable__ ( SegmentViewType& segmentView ) mutable {
+      auto rowView = RowView( segmentView, values_view, columns_view );
+      function( rowView );
+   };
+   this->segments.forSegments( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView,
+          typename ComputeReal >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   const auto columns_view = this->columnIndexes.getConstView();
+   const auto values_view = this->values.getConstView();
+   using SegmentViewType = typename SegmentsViewType::SegmentViewType;
+   auto f = [=] __cuda_callable__ ( const SegmentViewType& segmentView ) mutable {
+      const auto rowView = ConstRowView( segmentView, values_view, columns_view );
+      function( rowView );
+   };
+   this->segments.forSegments( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView,
+          typename ComputeReal >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
+forAllRows( Function&& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename MatrixType,
+          template< typename, typename > class SegmentsView,
+          typename ComputeReal >
+   template< typename Function >
+void
+SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
+forAllRows( Function&& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -670,7 +740,7 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
 sequentialForRows( IndexType begin, IndexType end, Function& function ) const
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename Real,
@@ -685,7 +755,7 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >::
 sequentialForRows( IndexType begin, IndexType end, Function& function )
 {
    for( IndexType row = begin; row < end; row ++ )
-      this->forElements( row, row + 1, function );
+      this->forRows( row, row + 1, function );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/TridiagonalMatrix.h b/src/TNL/Matrices/TridiagonalMatrix.h
index dc6b31cb52309cec7326771570a6ea172f8c8b1e..c970ff9b7f837c14bfe10e8d57d2dee54ac4b721 100644
--- a/src/TNL/Matrices/TridiagonalMatrix.h
+++ b/src/TNL/Matrices/TridiagonalMatrix.h
@@ -70,7 +70,7 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
       // Supporting types - they are not important for the user
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
       using IndexerType = details::TridiagonalMatrixIndexer< Index, Organization >;
-      using ValuesVectorType = typename BaseType::ValuesVectorType;
+      using ValuesVectorType = typename BaseType::ValuesType;
       using ValuesViewType = typename ValuesVectorType::ViewType;
 
       /**
@@ -117,8 +117,12 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
       /**
        * \brief Type for accessing matrix rows.
        */
-      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
+      using RowView = typename ViewType::RowView;
 
+      /**
+       * \brief Type for accessing constant matrix rows.
+       */
+      using ConstRowView = typename ViewType::ConstRowView;
 
       /**
        * \brief Helper type for getting self type or its modifications.
@@ -401,7 +405,7 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * See \ref TridiagonalMatrixRowView.
        */
       __cuda_callable__
-      const RowView getRow( const IndexType& rowIdx ) const;
+      const ConstRowView getRow( const IndexType& rowIdx ) const;
 
       /**
        * \brief Set all matrix elements to given value.
@@ -418,7 +422,7 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
-       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forEachElement.
+       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -442,7 +446,7 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
-       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forEachElement.
+       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -470,7 +474,7 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
-       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forEachElement.
+       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -505,12 +509,12 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cpp
        * \par Output
-       * \include TridiagonalMatrixExample_rowsReduction.out
+       * \include TridiagonalMatrixExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on matrix rows of constant matrix instances.
@@ -532,12 +536,12 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceRows.cpp
        * \par Output
-       * \include TridiagonalMatrixExample_rowsReduction.out
+       * \include TridiagonalMatrixExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on all matrix rows.
@@ -559,12 +563,12 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cpp
        * \par Output
-       * \include TridiagonalMatrixExample_allRowsReduction.out
+       * \include TridiagonalMatrixExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on all matrix rows of constant matrix instances.
@@ -586,12 +590,12 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_reduceAllRows.cpp
        * \par Output
-       * \include TridiagonalMatrixExample_allRowsReduction.out
+       * \include TridiagonalMatrixExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for iteration over matrix rows for constant instances.
@@ -657,7 +661,7 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \include TridiagonalMatrixExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function& function ) const;
 
       /**
        * \brief Method for iteration over all matrix rows for non-constant instances.
@@ -679,7 +683,107 @@ class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \include TridiagonalMatrixExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllElements( Function& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrix::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrix::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrix::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
diff --git a/src/TNL/Matrices/TridiagonalMatrix.hpp b/src/TNL/Matrices/TridiagonalMatrix.hpp
index cbdba8299f732825eb1487f39974d9bb504122a5..87a508a9cdb4de7cdbced0e4b3ea371594a55325 100644
--- a/src/TNL/Matrices/TridiagonalMatrix.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrix.hpp
@@ -283,7 +283,7 @@ template< typename Real,
 __cuda_callable__
 auto
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-getRow( const IndexType& rowIdx ) const -> const RowView
+getRow( const IndexType& rowIdx ) const -> const ConstRowView
 {
    return this->view.getRow( rowIdx );
 }
@@ -348,9 +348,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   this->view.reduceRows( first, last, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -361,9 +361,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
+   this->view.reduceRows( first, last, fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -374,9 +374,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->view.reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -387,9 +387,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->view.reduceRows( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -426,7 +426,7 @@ template< typename Real,
    template< typename Function >
 void
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-forEachElement( Function& function ) const
+forAllElements( Function& function ) const
 {
    this->view.forElements( 0, this->getRows(), function );
 }
@@ -439,11 +439,63 @@ template< typename Real,
    template< typename Function >
 void
 TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
-forEachElement( Function& function )
+forAllElements( Function& function )
 {
    this->view.forElements( 0, this->getRows(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   this->getView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   this->getConstView().forRows( begin, end, function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+forAllRows( Function&& function )
+{
+   this->getView().forAllRows( function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization,
+          typename RealAllocator >
+   template< typename Function >
+void
+TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::
+forAllRows( Function&& function ) const
+{
+   this->getConsView().forAllRows( function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -668,7 +720,7 @@ operator=( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealA
          auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
             value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
          };
-         this->forEachElement( f );
+         this->forAllElements( f );
       }
       else
       {
@@ -678,7 +730,7 @@ operator=( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealA
          auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable {
             value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
          };
-         this->forEachElement( f );
+         this->forAllElements( f );
       }
    }
    return *this;
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.h b/src/TNL/Matrices/TridiagonalMatrixRowView.h
index 939e4893ff0257cf014e61541357b53701c081f3..dd60f907354f5da7bce5a7a5279c73e940fdf946 100644
--- a/src/TNL/Matrices/TridiagonalMatrixRowView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.h
@@ -10,23 +10,26 @@
 
 #pragma once
 
+#include <TNL/Matrices/MatrixRowViewIterator.h>
+#include <TNL/Matrices/MultidiagonalMatrixElement.h>
+
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 /**
  * \brief RowView is a simple structure for accessing rows of tridiagonal matrix.
- * 
+ *
  * \tparam ValuesView is a vector view storing the matrix elements values.
  * \tparam Indexer is type of object responsible for indexing and organization of
  *    matrix elements.
- * 
+ *
  * See \ref TridiagonalMatrix and \ref TridiagonalMatrixView.
- * 
+ *
  * \par Example
  * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp
  * \par Output
  * \include TridiagonalatrixExample_getRow.out
- * 
+ *
  * \par Example
  * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp
  * \par Output
@@ -59,9 +62,39 @@ class TridiagonalMatrixRowView
        */
       using IndexerType = Indexer;
 
+      /**
+       * \brief Type of constant container view used for storing the matrix elements values.
+       */
+      using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+
+      /**
+       * \brief Type of constant indexer view.
+       */
+      using ConstIndexerViewType = typename Indexer::ConstType;
+
+      /**
+       * \brief Type of constant sparse matrix row view.
+       */
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
+
+      /**
+       * \brief Type of constant sparse matrix row view.
+       */
+      using ConstRowView = TridiagonalMatrixRowView< ConstValuesViewType, ConstIndexerViewType >;
+
+      /**
+       * \brief The type of related matrix element.
+       */
+      using MatrixElementType = MultidiagonalMatrixElement< RealType, IndexType >;
+
+      /**
+       * \brief Type of iterator for the matrix row.
+       */
+      using IteratorType = MatrixRowViewIterator< RowView >;
+
       /**
        * \brief Constructor with all necessary data.
-       * 
+       *
        * \param rowIdx is index of the matrix row this RowView refer to.
        * \param values is a vector view holding values of matrix elements.
        * \param indexer is object responsible for indexing and organization of matrix elements
@@ -73,17 +106,25 @@ class TridiagonalMatrixRowView
 
       /**
        * \brief Returns number of diagonals of the tridiagonal matrix which is three.
-       * 
+       *
        * \return number three.
        */
       __cuda_callable__
       IndexType getSize() const;
 
+      /**
+       * \brief Returns the matrix row index.
+       *
+       * \return matrix row index.
+       */
+      __cuda_callable__
+      const IndexType& getRowIndex() const;
+
       /**
        * \brief Computes column index of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the subdiagonal.
-       * 
+       *
        * \return column index of matrix element on given subdiagonal.
        */
       __cuda_callable__
@@ -91,9 +132,9 @@ class TridiagonalMatrixRowView
 
       /**
        * \brief Returns value of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the subdiagonal.
-       * 
+       *
        * \return constant reference to matrix element value.
        */
       __cuda_callable__
@@ -101,9 +142,9 @@ class TridiagonalMatrixRowView
 
       /**
        * \brief Returns value of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the subdiagonal.
-       * 
+       *
        * \return non-constant reference to matrix element value.
        */
       __cuda_callable__
@@ -111,13 +152,46 @@ class TridiagonalMatrixRowView
 
       /**
        * \brief Changes value of matrix element on given subdiagonal.
-       * 
+       *
        * \param localIdx is an index of the matrix subdiagonal.
        * \param value is the new value of the matrix element.
        */
       __cuda_callable__
       void setElement( const IndexType localIdx,
                        const RealType& value );
+
+      /**
+       * \brief Returns iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      IteratorType begin();
+
+      /**
+       * \brief Returns iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      IteratorType end();
+
+      /**
+       * \brief Returns constant iterator pointing at the beginning of the matrix row.
+       *
+       * \return iterator pointing at the beginning.
+       */
+      __cuda_callable__
+      const IteratorType cbegin() const;
+
+      /**
+       * \brief Returns constant iterator pointing at the end of the matrix row.
+       *
+       * \return iterator pointing at the end.
+       */
+      __cuda_callable__
+      const IteratorType cend() const;
+
    protected:
 
       IndexType rowIdx;
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
index 80fc1a26d52c32b60d1e184ee0beb87ef908c687..de4983d866b266c8cf88a42b6447d4049928e266 100644
--- a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
@@ -11,7 +11,7 @@
 #pragma once
 
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 
 template< typename ValuesView, typename Indexer >
 __cuda_callable__
@@ -32,6 +32,15 @@ getSize() const -> IndexType
    return indexer.getRowSize( rowIdx );
 }
 
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getRowIndex() const -> const IndexType&
+{
+   return rowIdx;
+}
+
 template< typename ValuesView, typename Indexer >
 __cuda_callable__
 auto
@@ -63,7 +72,7 @@ getValue( const IndexType localIdx ) -> RealType&
 
 template< typename ValuesView, typename Indexer >
 __cuda_callable__
-void 
+void
 TridiagonalMatrixRowView< ValuesView, Indexer >::
 setElement( const IndexType localIdx,
             const RealType& value )
@@ -71,5 +80,41 @@ setElement( const IndexType localIdx,
    this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
 }
 
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+begin() -> IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+end() -> IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+cbegin() const -> const IteratorType
+{
+   return IteratorType( *this, 0 );
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+cend() const -> const IteratorType
+{
+   return IteratorType( *this, this->getSize() );
+}
+
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 324caea8639fa08921a3013a49704e2b0ccc8756..be2926934f46d794858763c31c045731a0b51e4c 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -26,7 +26,7 @@ namespace Matrices {
  * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels.
  *
  * See \ref TridiagonalMatrix for more details.
- * 
+ *
  * \tparam Real is a type of matrix elements.
  * \tparam Device is a device where the matrix is allocated.
  * \tparam Index is a type for indexing of the matrix elements.
@@ -76,6 +76,11 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        */
       using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
+      /**
+       * \brief Type for accessing constant matrix rows.
+       */
+      using ConstRowView = typename RowView::ConstRowView;
+
       /**
        * \brief Helper type for getting self type or its modifications.
        */
@@ -255,7 +260,7 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * See \ref TridiagonalMatrixRowView.
        */
       __cuda_callable__
-      const RowView getRow( const IndexType& rowIdx ) const;
+      const ConstRowView getRow( const IndexType& rowIdx ) const;
 
       /**
        * \brief Set all matrix elements to given value.
@@ -272,7 +277,7 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
-       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forEachElement.
+       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -297,7 +302,7 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
-       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forEachElement.
+       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forAllElements.
        * The call may fail if the matrix row capacity is exhausted.
        *
        * \param row is row index of the element.
@@ -325,7 +330,7 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * can be called even from device kernels. If the matrix is allocated in GPU device
        * this method is called from CPU, it transfers values of each matrix element separately and so the
        * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow
-       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forEachElement.
+       * or \ref TridiagonalMatrix::forElements and \ref TridiagonalMatrix::forAllElements.
        *
        * \param row is a row index of the matrix element.
        * \param column i a column index of the matrix element.
@@ -361,12 +366,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include TridiagonalMatrixViewExample_rowsReduction.out
+       * \include TridiagonalMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on matrix rows.
@@ -388,12 +393,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceRows.cpp
        * \par Output
-       * \include TridiagonalMatrixViewExample_rowsReduction.out
+       * \include TridiagonalMatrixViewExample_reduceRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for performing general reduction on all matrix rows for constant instances.
@@ -413,12 +418,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include TridiagonalMatrixViewExample_allRowsReduction.out
+       * \include TridiagonalMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
       /**
        * \brief Method for performing general reduction on all matrix rows.
@@ -438,12 +443,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \param zero is zero of given reduction operation also known as idempotent element.
        *
        * \par Example
-       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_reduceAllRows.cpp
        * \par Output
-       * \include TridiagonalMatrixViewExample_allRowsReduction.out
+       * \include TridiagonalMatrixViewExample_reduceAllRows.out
        */
       template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
-      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
+      void reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero );
 
       /**
        * \brief Method for iteration over all matrix rows for constant instances.
@@ -503,7 +508,7 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \include TridiagonalMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function ) const;
+      void forAllElements( Function& function ) const;
 
       /**
        * \brief This method calls \e forElements for all matrix rows.
@@ -519,7 +524,107 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
        * \include TridiagonalMatrixViewExample_forAllRows.out
        */
       template< typename Function >
-      void forEachElement( Function& function );
+      void forAllElements( Function& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end).
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrixView::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over matrix rows from interval [ \e begin, \e end) for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrixView::forElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param begin defines beginning of the range [ \e begin,\e end ) of rows to be processed.
+       * \param end defines ending of the range [ \e begin, \e end ) of rows to be processed.
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forRows( IndexType begin, IndexType end, Function&& function ) const;
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) mutable { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function );
+
+      /**
+       * \brief Method for parallel iteration over all matrix rows for constant instances.
+       *
+       * In each row, given lambda function is performed. Each row is processed by at most one thread unlike the method
+       * \ref TridiagonalMatrixView::forAllElements where more than one thread can be mapped to each row.
+       *
+       * \tparam Function is type of the lambda function.
+       *
+       * \param function is an instance of the lambda function to be called for each row.
+       *
+       * ```
+       * auto function = [] __cuda_callable__ ( RowView& row ) { ... };
+       * ```
+       *
+       * \e RowView represents matrix row - see \ref TNL::Matrices::TridiagonalMatrixView::RowView.
+       *
+       * \par Example
+       * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp
+       * \par Output
+       * \include TridiagonalMatrixViewExample_forRows.out
+       */
+      template< typename Function >
+      void forAllRows( Function&& function ) const;
 
       /**
        * \brief Method for sequential iteration over all matrix rows for constant instances.
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index c125ffe222d690e5153d51e82e995bdf48372ea8..3aa633776c7bbacc11318b9f3871b87027bbcca2 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -118,7 +118,7 @@ getCompressedRowLengths( Vector& rowLengths ) const
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   this->allRowsReduction( fetch, reduce, keep, 0 );
+   this->reduceAllRows( fetch, reduce, keep, 0 );
 }
 
 template< typename Real,
@@ -183,9 +183,9 @@ template< typename Real,
 __cuda_callable__
 auto
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-getRow( const IndexType& rowIdx ) const -> const RowView
+getRow( const IndexType& rowIdx ) const -> const ConstRowView
 {
-   return RowView( rowIdx, this->values.getView(), this->indexer );
+   return ConstRowView( rowIdx, this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -279,7 +279,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const
 {
    using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    const auto values_view = this->values.getConstView();
@@ -323,7 +323,7 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ )
+reduceRows( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ )
 {
    using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
    auto values_view = this->values.getConstView();
@@ -367,9 +367,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -379,9 +379,9 @@ template< typename Real,
    template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
 void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
+reduceAllRows( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero )
 {
-   this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
+   this->reduceRows( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -461,7 +461,7 @@ template< typename Real,
    template< typename Function >
 void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-forEachElement( Function& function ) const
+forAllElements( Function& function ) const
 {
    this->forElements( 0, this->indxer.getNonEmptyRowsCount(), function );
 }
@@ -473,11 +473,69 @@ template< typename Real,
    template< typename Function >
 void
 TridiagonalMatrixView< Real, Device, Index, Organization >::
-forEachElement( Function& function )
+forAllElements( Function& function )
 {
    this->forElements( 0, this->indexer.getNonemptyRowsCount(), function );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+forRows( IndexType begin, IndexType end, Function&& function )
+{
+   auto view = *this;
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto rowView = view.getRow( rowIdx );
+      function( rowView );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+forRows( IndexType begin, IndexType end, Function&& function ) const
+{
+   auto view = *this;
+   auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable {
+      auto rowView = view.getRow( rowIdx );
+      function( rowView );
+   };
+   TNL::Algorithms::ParallelFor< DeviceType >::exec( begin, end, f );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+forAllRows( Function&& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          ElementsOrganization Organization >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, Organization >::
+forAllRows( Function&& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -563,9 +621,9 @@ vectorProduct( const InVector& inVector,
    if( end == 0 )
       end = this->getRows();
    if( matrixMultiplicator == 1.0 && outVectorMultiplicator == 0.0 )
-      this->rowsReduction( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 );
+      this->reduceRows( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 );
    else
-      this->rowsReduction( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 );
+      this->reduceRows( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -617,11 +675,11 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >&
          value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
       };
       if( thisMult == 0.0 )
-         this->forEachElement( add0 );
+         this->forAllElements( add0 );
       else if( thisMult == 1.0 )
-         this->forEachElement( add1 );
+         this->forAllElements( add1 );
       else
-         this->forEachElement( addGen );
+         this->forAllElements( addGen );
    }
 }
 
diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
index cb657e8dfe52c9c047e32509a9b16fc178351be3..0ba1f148802e1f9223ac9a3d96a5a3278597f87e 100644
--- a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h
@@ -34,7 +34,7 @@ class MultidiagonalMatrixIndexer
                                   const IndexType& columns,
                                   const IndexType& diagonals,
                                   const IndexType& nonemptyRows )
-      : rows( rows ), 
+      : rows( rows ),
         columns( columns ),
         diagonals( diagonals ),
         nonemptyRows( nonemptyRows ) {};
@@ -79,7 +79,7 @@ class MultidiagonalMatrixIndexer
          TNL_ASSERT_LT( localIdx, diagonals, "" );
          TNL_ASSERT_GE( rowIdx, 0, "" );
          TNL_ASSERT_LT( rowIdx, this->rows, "" );
-         
+
          if( RowMajorOrder )
             return diagonals * rowIdx + localIdx;
          else
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
index 6707193141483672510a2ac8f63e2b8862b702f0..002bb8a736d3e59483ddaa67862d2e256a9619b9 100644
--- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -21,6 +21,7 @@ class TridiagonalMatrixIndexer
    public:
 
       using IndexType = Index;
+      using ConstType = TridiagonalMatrixIndexer< std::add_const_t< Index >, RowMajorOrder >;
 
       static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
 
@@ -46,15 +47,6 @@ class TridiagonalMatrixIndexer
       __cuda_callable__
       IndexType getRowSize( const IndexType rowIdx ) const
       {
-         /*if( rowIdx == 0 )
-            return 2;
-         if( columns <= rows )
-         {
-            if( rowIdx == columns - 1 )
-               return 2;
-            if( rowIdx == columns )
-               return 1;
-         }*/
          return 3;
       };
 
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
index 0a3b8d43a601bd1cde378cbdac04d5b8d6552c88..845637e48f18e3f613b323991810ec54d5dc78ec 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
@@ -35,7 +35,7 @@ update( const MatrixPointer& matrixPointer )
 
    const auto kernel_matrix = matrixPointer->getView();
 
-   // TODO: Rewrite this with SparseMatrix::forEachElement
+   // TODO: Rewrite this with SparseMatrix::forAllElements
    auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
    {
       diag_view[ i ] = kernel_matrix.getElement( i, i );
diff --git a/src/TNL/TypeTraits.h b/src/TNL/TypeTraits.h
index 63b8fc27391ebf9682c4a6a0e7a022a81caa2599..c5d0fea363e60a9fd096fcf96058e3df3b2c5942 100644
--- a/src/TNL/TypeTraits.h
+++ b/src/TNL/TypeTraits.h
@@ -270,4 +270,19 @@ public:
     static constexpr bool value = ( sizeof( test< std::decay_t<T> >(0) ) == sizeof( YesType ) );
 };
 
+/**
+ * \brief Copy const qualifier from Source type to Target type.
+ */
+template< typename Target >
+struct copy_const
+{
+   template< typename Source >
+   struct from
+   {
+      using type = typename std::conditional<
+       std::is_const< Source >::value,
+       std::add_const_t< Target >, Target >::type;
+   };
+};
+
 } //namespace TNL
diff --git a/src/UnitTests/Algorithms/Segments/SegmentsTest.hpp b/src/UnitTests/Algorithms/Segments/SegmentsTest.hpp
index de634cf01ff2f7ca70f9eb20b21a7beb1f39d33c..7073bdb8ab4dc2aa5aefa6013b6867d5e2a6746b 100644
--- a/src/UnitTests/Algorithms/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Algorithms/Segments/SegmentsTest.hpp
@@ -132,7 +132,7 @@ void test_AllReduction_MaximumInSegments()
       view[ globalIdx ] =  segmentIdx * 5 + localIdx + 1;
       return true;
    };
-   segments.forEachElement( init );
+   segments.forAllElements( init );
 
    TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount );
 
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index 148d92aa659b1754cce8b45367df3d9a361d2bc6..1ed8052eef47445e42fa34135c5a83cc431754ea 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -445,7 +445,7 @@ void testArrayForEachElement()
    using ValueType = typename ArrayType::ValueType;
 
    ArrayType a( 10 );
-   a.forEachElement( [] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = i; } );
+   a.forAllElements( [] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = i; } );
 
    for( int i = 0; i < 10; i++ )
       EXPECT_EQ( a.getElement( i ), i );
diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h
index 97ddc3da8680b20069f76abb6f1e9867a74ca8d6..d620b8bbb1ced1446f05e99fd1992c9609eb777b 100644
--- a/src/UnitTests/Containers/ArrayViewTest.h
+++ b/src/UnitTests/Containers/ArrayViewTest.h
@@ -48,7 +48,6 @@ std::ostream& operator<<( std::ostream& str, const MyData& v )
    return str << v.data;
 }
 
-
 // test fixture for typed tests
 template< typename View >
 class ArrayViewTest : public ::testing::Test
@@ -274,7 +273,7 @@ void ArrayViewEvaluateTest( ArrayType& u )
    using ViewType = ArrayView< ValueType, DeviceType, IndexType >;
    ViewType v( u );
 
-   v.forEachElement( [] __cuda_callable__ ( IndexType i, ValueType& value ) { value = 3 * i % 4; } );
+   v.forAllElements( [] __cuda_callable__ ( IndexType i, ValueType& value ) { value = 3 * i % 4; } );
    
    for( int i = 0; i < 10; i++ )
    {
diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h
index fa99547ec325eb9a3a9b6f10c38d5ad252d6bf66..2c5deb59da9e4a9d112ee1fc021a16d52c20ff01 100644
--- a/src/UnitTests/Containers/VectorTest.h
+++ b/src/UnitTests/Containers/VectorTest.h
@@ -89,7 +89,7 @@ void testVectorReduceElements()
    using ValueType = typename VectorType::ValueType;
 
    VectorType a( 10 );
-   a.forEachElement( [=] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = 1; } );
+   a.forAllElements( [=] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = 1; } );
    auto fetch = [] __cuda_callable__ ( IndexType i, ValueType& v ) -> ValueType { return v; };
    auto reduce = [] __cuda_callable__ ( const ValueType v1, const ValueType v2 ) { return v1 + v2; };
    EXPECT_EQ( a.reduceEachElement( fetch, reduce, ( ValueType ) 0.0 ),
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
index 36ea3bc8181ac8a29739e7537e623aebc9ab8df9..ca839a02f5eb859350248c3be0de6dcc00177c8d 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp
@@ -878,7 +878,7 @@ void test_VectorProduct()
 }
 
 template< typename Matrix >
-void test_RowsReduction()
+void test_reduceRows()
 {
    using RealType = typename Matrix::RealType;
    using ComputeRealType = typename Matrix::ComputeRealType;
@@ -955,7 +955,7 @@ void test_RowsReduction()
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   m.reduceAllRows( fetch, std::plus<>{}, keep, 0 );
    EXPECT_EQ( rowsCapacities, rowLengths );
    m.getCompressedRowLengths( rowLengths );
    EXPECT_EQ( rowsCapacities, rowLengths );
@@ -970,7 +970,7 @@ void test_RowsReduction()
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowSums_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( max_fetch, std::plus<>{}, max_keep, 0 );
+   m.reduceAllRows( max_fetch, std::plus<>{}, max_keep, 0 );
    const auto maxNorm = TNL::max( rowSums );
    EXPECT_EQ( maxNorm, 8 ) ; // 29+30+31+32+33+34+35+36
 }
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
index 5a4e98915cbec11a8107194b5b3ed33ab26e4e8c..8eaf6660b7082aefcaed97790e87c21937a2d524 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
@@ -89,11 +89,11 @@ TYPED_TEST( BinaryMatrixTest_CSR, vectorProductTest )
     test_VectorProduct< CSRMatrixType >();
 }
 
-TYPED_TEST( BinaryMatrixTest_CSR, rowsReduction )
+TYPED_TEST( BinaryMatrixTest_CSR, reduceRows )
 {
     using CSRMatrixType = typename TestFixture::CSRMatrixType;
 
-    test_RowsReduction< CSRMatrixType >();
+    test_reduceRows< CSRMatrixType >();
 }
 
 TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest )
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
index b903edeaa4dd0f074f867e229ff6045bc2bb8081..69fc4c73796c9b4c52395fa3acff2f758e489ac0 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h
@@ -100,11 +100,11 @@ TYPED_TEST( BinaryMatrixTest_Ellpack, vectorProductTest )
     test_VectorProduct< EllpackMatrixType >();
 }
 
-TYPED_TEST( BinaryMatrixTest_Ellpack, rowsReduction )
+TYPED_TEST( BinaryMatrixTest_Ellpack, reduceRows )
 {
     using EllpackMatrixType = typename TestFixture::EllpackMatrixType;
 
-    test_RowsReduction< EllpackMatrixType >();
+    test_reduceRows< EllpackMatrixType >();
 }
 
 TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest )
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
index 673b3b49bb56733657711e90acccf2df3dc3038f..b547ee866782c2ffccd0a5f969805f4b8970e0e8 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h
@@ -100,11 +100,11 @@ TYPED_TEST( BinaryMatrixTest_SlicedEllpack, vectorProductTest )
     test_VectorProduct< SlicedEllpackMatrixType >();
 }
 
-TYPED_TEST( BinaryMatrixTest_SlicedEllpack, rowsReduction )
+TYPED_TEST( BinaryMatrixTest_SlicedEllpack, reduceRows )
 {
     using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType;
 
-    test_RowsReduction< SlicedEllpackMatrixType >();
+    test_reduceRows< SlicedEllpackMatrixType >();
 }
 
 TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest )
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index a65411fc065b299fdc86d2e2550f07456257c75e..ef639fc440f71af822eb5822209fe802bb590d1b 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -5,7 +5,6 @@ set( COMMON_TESTS
             DenseMatrixCopyTest
             TridiagonalMatrixTest
             MultidiagonalMatrixTest
-
             SparseMatrixTest_CSRScalar
             SparseMatrixTest_CSRVector
             SparseMatrixTest_CSRHybrid
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 9cd7c3db05b7cdcda7fb2628102db3e6b2cd6f4c..c6dfa3842d88c3895156b61e5a71c162d05c8859 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -36,8 +36,6 @@ static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl";
 void test_GetSerializationType()
 {
    using namespace TNL::Algorithms::Segments;
-   std::cerr << TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() << std::endl;
-   std::cerr << TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() << std::endl;
    EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, RowMajorOrder >" ) );
    EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int,   TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, RowMajorOrder >" ) );
    EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, RowMajorOrder >" ) );
@@ -608,7 +606,7 @@ void test_SetRow()
          { 2, 3, 4, 5, 6 } };
       auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
-        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+        row.setValue( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
@@ -725,7 +723,7 @@ void test_AddRow()
       auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
       {
-         RealType& val = row.getElement( i );
+         RealType& val = row.getValue( i );
          val = rowIdx * val + values[ rowIdx ][ i ];
       }
    };
@@ -769,6 +767,186 @@ void test_AddRow()
     EXPECT_EQ( m.getElement( 5, 4 ), 150 );
 }
 
+template< typename Matrix >
+void test_ForElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x3 sparse matrix:
+    *
+    *    /  1  1  1  \
+    *    |  2  2  2  |
+    *    |  3  3  3  |
+    *    |  4  4  4  |
+    *    |  5  5  5  |
+    *    |  6  6  6  |
+    *    |  7  7  7  |
+    *    \  8  8  8  /
+    */
+
+   const IndexType cols = 3;
+   const IndexType rows = 8;
+
+   Matrix m( rows, cols  );
+   m.forAllElements( [] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, const IndexType& columnIdx, RealType& value, bool compute ) mutable {
+      value = rowIdx + 1.0;
+   } );
+
+   for( IndexType rowIdx = 0; rowIdx < rows; rowIdx++ )
+      for( IndexType colIdx = 0; colIdx < cols; colIdx++ )
+         EXPECT_EQ( m.getElement( rowIdx, colIdx ), rowIdx + 1.0 );
+}
+
+template< typename Matrix >
+void test_ForRows()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /////
+   // Setup lower triangular matrix
+   const IndexType cols = 8;
+   const IndexType rows = 8;
+
+   /////
+   // Test without iterator
+   Matrix m( rows, cols  );
+   using RowView = typename Matrix::RowView;
+   m.forAllRows( [] __cuda_callable__ ( RowView& row ) mutable {
+      for( IndexType localIdx = 0; localIdx <= row.getRowIndex(); localIdx++ )
+         row.setValue( localIdx, row.getRowIndex() - localIdx + 1.0 );
+   } );
+
+   for( IndexType rowIdx = 0; rowIdx < rows; rowIdx++ )
+      for( IndexType colIdx = 0; colIdx < cols; colIdx++ )
+      {
+         if( colIdx <= rowIdx )
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), rowIdx - colIdx + 1.0 );
+         else
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), 0.0 );
+      }
+
+   /////
+   // Test without iterator
+   m.getValues() = 0.0;
+   m.forAllRows( [] __cuda_callable__ ( RowView& row ) mutable {
+      for( auto element : row )
+         if( element.columnIndex() <= element.rowIndex() )
+            element.value() = element.rowIndex() - element.columnIndex() + 1.0;
+   } );
+
+   for( IndexType rowIdx = 0; rowIdx < rows; rowIdx++ )
+      for( IndexType colIdx = 0; colIdx < cols; colIdx++ )
+      {
+         if( colIdx <= rowIdx )
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), rowIdx - colIdx + 1.0 );
+         else
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), 0.0 );
+      }
+
+
+}
+
+template< typename Matrix >
+void test_reduceRows()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x8 sparse matrix:
+    *
+    *    /  1  2  3  0  4  5  0  1 \   6
+    *    |  0  6  0  7  0  0  0  1 |   3
+    *    |  0  8  9  0 10  0  0  1 |   4
+    *    |  0 11 12 13 14  0  0  1 |   5
+    *    |  0 15  0  0  0  0  0  1 |   2
+    *    |  0 16 17 18 19 20 21  1 |   7
+    *    | 22 23 24 25 26 27 28  1 |   8
+    *    \ 29 30 31 32 33 34 35 36 /   8
+    */
+
+   const IndexType rows = 8;
+   const IndexType cols = 8;
+
+   Matrix m( {
+        {  1,  2,  3,  0,  4,  5,  0,  1 },
+        {  0,  6,  0,  7,  0,  0,  0,  1 },
+        {  0,  8,  9,  0, 10,  0,  0,  1 },
+        {  0, 11, 12, 13, 14,  0,  0,  1 },
+        {  0, 15,  0,  0,  0,  0,  0,  1 },
+        {  0, 16, 17, 18, 19, 20, 21,  1 },
+        { 22, 23, 24, 25, 26, 27, 28,  1 },
+        { 29, 30, 31, 32, 33, 34, 35, 36 } } );
+    typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
+
+   RealType value = 1;
+   for( IndexType i = 0; i < 3; i++ )   // 0th row
+      m.setElement( 0, i, value++ );
+
+   m.setElement( 0, 4, value++ );       // 0th row
+   m.setElement( 0, 5, value++ );
+
+   m.setElement( 1, 1, value++ );       // 1st row
+   m.setElement( 1, 3, value++ );
+
+   for( IndexType i = 1; i < 3; i++ )   // 2nd row
+      m.setElement( 2, i, value++ );
+
+   m.setElement( 2, 4, value++ );       // 2nd row
+
+   for( IndexType i = 1; i < 5; i++ )   // 3rd row
+      m.setElement( 3, i, value++ );
+
+   m.setElement( 4, 1, value++ );       // 4th row
+
+   for( IndexType i = 1; i < 7; i++ )   // 5th row
+      m.setElement( 5, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )   // 6th row
+      m.setElement( 6, i, value++ );
+
+   for( IndexType i = 0; i < 8; i++ )   // 7th row
+       m.setElement( 7, i, value++ );
+
+   for( IndexType i = 0; i < 7; i++ )   // 1s at the end of rows
+      m.setElement( i, 7, 1);
+
+   ////
+   // Compute number of non-zero elements in rows.
+   typename Matrix::RowsCapacitiesType rowLengths( rows );
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   m.reduceAllRows( fetch, std::plus<>{}, keep, 0 );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+   m.getCompressedRowLengths( rowLengths );
+   EXPECT_EQ( rowsCapacities, rowLengths );
+
+   ////
+   // Compute max norm
+   TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
+   auto rowSums_view = rowSums.getView();
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return TNL::abs( value );
+   };
+   auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowSums_view[ rowIdx ] = value;
+   };
+   m.reduceAllRows( max_fetch, std::plus<>{}, max_keep, 0 );
+   const RealType maxNorm = TNL::max( rowSums );
+   EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
+}
+
 template< typename Matrix >
 void test_VectorProduct()
 {
@@ -1423,6 +1601,20 @@ TYPED_TEST( MatrixTest, addRowTest )
     test_AddRow< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, forElementsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_ForElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, forRowsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_ForRows< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.h b/src/UnitTests/Matrices/LambdaMatrixTest.h
index cc2893d9bc0cc33d6385520a2b7d34e25c0b9907..0cdfb37f3085d39e9e764b9daa9e38dcd5f45f89 100644
--- a/src/UnitTests/Matrices/LambdaMatrixTest.h
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.h
@@ -89,6 +89,14 @@ TYPED_TEST( LambdaMatrixTest, getElementTest )
    test_GetElement< LambdaMatrixParametersType >();
 }
 
+TYPED_TEST( LambdaMatrixTest, forRowsTest )
+{
+    using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
+
+    test_ForRows< LambdaMatrixParametersType >();
+}
+
+
 TYPED_TEST( LambdaMatrixTest, vectorProductTest )
 {
     using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
@@ -96,11 +104,11 @@ TYPED_TEST( LambdaMatrixTest, vectorProductTest )
     test_VectorProduct< LambdaMatrixParametersType >();
 }
 
-TYPED_TEST( LambdaMatrixTest, rowsReduction )
+TYPED_TEST( LambdaMatrixTest, reduceRows )
 {
     using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType;
 
-    test_RowsReduction< LambdaMatrixParametersType >();
+    test_reduceRows< LambdaMatrixParametersType >();
 }
 #endif
 
diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.hpp b/src/UnitTests/Matrices/LambdaMatrixTest.hpp
index 256a84bf7fba0069a9130c92afed76e6249fe547..ae435059ddd5b264ab1f79314c3b16f7b693be83 100644
--- a/src/UnitTests/Matrices/LambdaMatrixTest.hpp
+++ b/src/UnitTests/Matrices/LambdaMatrixTest.hpp
@@ -13,6 +13,7 @@
 
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
+#include <TNL/Matrices/DenseMatrix.h>
 
 template< typename Matrix >
 void test_Constructors()
@@ -161,6 +162,79 @@ void test_GetElement()
    EXPECT_EQ( m.getElement( 4, 4 ),  1.0 );
 }
 
+template< typename Matrix >
+void test_ForRows()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /**
+    * Prepare lambda matrix of the following form:
+    *
+    * /  1   0   0   0   0 \.
+    * | -2   1  -2   0   0 |
+    * |  0  -2   1  -2   0 |
+    * |  0   0  -2   1  -2 |
+    * \  0   0   0   0   1 /.
+    */
+
+   IndexType size = 5;
+   auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType {
+      if( rowIdx == 0 || rowIdx == size - 1 )
+         return 1;
+      return 3;
+   };
+
+   auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) {
+      if( rowIdx == 0 || rowIdx == size -1 )
+      {
+         columnIdx = rowIdx;
+         value =  1.0;
+      }
+      else
+      {
+         columnIdx = rowIdx + localIdx - 1;
+         value = ( columnIdx == rowIdx ) ? -2.0 : 1.0;
+      }
+   };
+
+   using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) );
+
+   MatrixType m( size, size, matrixElements, rowLengths );
+
+   ////
+   // Test without iterator
+   TNL::Matrices::DenseMatrix< RealType, DeviceType, IndexType > denseMatrix( size, size );
+   denseMatrix.setValue( 0.0 );
+   auto dense_view = denseMatrix.getView();
+   auto f = [=] __cuda_callable__ ( const typename MatrixType::RowView& row ) mutable {
+      auto dense_row = dense_view.getRow( row.getRowIndex() );
+      for( IndexType localIdx = 0; localIdx < row.getSize(); localIdx++ )
+         dense_row.setValue( row.getColumnIndex( localIdx ), row.getValue( localIdx ) );
+   };
+   m.forAllRows( f );
+
+   for( IndexType row = 0; row < size; row++ )
+      for( IndexType column = 0; column < size; column++ )
+         EXPECT_EQ( m.getElement( row, column ), denseMatrix.getElement( row, column ) );
+
+   ////
+   // Test with iterator
+   denseMatrix.getValues() = 0.0;
+   auto f_iter = [=] __cuda_callable__ ( const typename MatrixType::RowView& row ) mutable {
+      auto dense_row = dense_view.getRow( row.getRowIndex() );
+      for( const auto element : row )
+         dense_row.setValue( element.columnIndex(), element.value() );
+   };
+   m.forAllRows( f_iter );
+
+   for( IndexType row = 0; row < size; row++ )
+      for( IndexType column = 0; column < size; column++ )
+         EXPECT_EQ( m.getElement( row, column ), denseMatrix.getElement( row, column ) );
+
+}
+
 template< typename Matrix >
 void test_VectorProduct()
 {
@@ -201,7 +275,7 @@ void test_VectorProduct()
 }
 
 template< typename Matrix >
-void test_RowsReduction()
+void test_reduceRows()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -242,7 +316,7 @@ void test_RowsReduction()
    auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
       vView[ row ] = value;
    };
-   A.allRowsReduction( fetch, reduce, keep, 0.0 );
+   A.reduceAllRows( fetch, reduce, keep, 0.0 );
 
    EXPECT_EQ( v.getElement( 0 ),  1.0 );
    EXPECT_EQ( v.getElement( 1 ),  0.0 );
diff --git a/src/UnitTests/Matrices/Legacy/Legacy_SparseMatrixTest.hpp b/src/UnitTests/Matrices/Legacy/Legacy_SparseMatrixTest.hpp
index ada0a79ec64799c745a64b98475eb818977c43df..68b40479996c53008f5489cce6f94a95e694959f 100644
--- a/src/UnitTests/Matrices/Legacy/Legacy_SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/Legacy/Legacy_SparseMatrixTest.hpp
@@ -1466,7 +1466,7 @@ void test_VectorProductCSRAdaptive()
 }
 
 template< typename Matrix >
-void test_RowsReduction()
+void test_reduceRows()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -1547,7 +1547,7 @@ void test_RowsReduction()
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( fetch, reduce, keep, 0 );
+   m.reduceAllRows( fetch, reduce, keep, 0 );
    EXPECT_EQ( rowsCapacities, rowLengths );
    m.getCompressedRowLengths( rowLengths );
    EXPECT_EQ( rowsCapacities, rowLengths );
@@ -1565,7 +1565,7 @@ void test_RowsReduction()
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowSums_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   m.reduceAllRows( max_fetch, max_reduce, max_keep, 0 );
    const RealType maxNorm = TNL::max( rowSums );
    EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
 }
diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
index cd753822425388fe17e151bcae29379423952f2f..0f2a4a63296711fbc102fecea420b3728b1a82a1 100644
--- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h
@@ -778,6 +778,84 @@ void test_AddRow()
    EXPECT_EQ( m.getElement( 5, 4 ),   0 );
 }
 
+template< typename Matrix >
+void test_ForRows()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /**
+    * Prepare lambda matrix of the following form:
+    *
+    * /  1  -2   0   0   0 \
+    * | -2   1  -2   0   0 |
+    * |  0  -2   1  -2   0 |
+    * |  0   0  -2   1  -2 |
+    * \  0   0   0  -2   1 /
+    */
+
+   const IndexType size( 5 );
+   Matrix m( size, size, { -1, 0, 1 } );
+
+   /////
+   // Test without iterator
+   auto f = [=] __cuda_callable__ ( typename Matrix::RowView& row ) mutable {
+      const IndexType rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setElement( 0, -2.0 );
+      row.setElement( 1, 1.0 );
+      if( rowIdx < size -1 )
+         row.setElement( 2, -2.0 );
+   };
+   m.forAllRows( f );
+
+   for( IndexType row = 0; row < size; row++ )
+      for( IndexType column = 0; column < size; column++ )
+      {
+         const IndexType diff = row - column;
+         if( diff == 0 )
+            EXPECT_EQ( m.getElement( row, column ), 1.0 );
+         else if( diff == 1 && row > 0 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else if( diff == -1 && row < size - 1 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else
+            EXPECT_EQ( m.getElement( row, column ), 0.0 );
+      }
+
+   /////
+   // Test with iterator
+   m.getValues() = 0.0;
+   auto f_iter = [=] __cuda_callable__ ( typename Matrix::RowView& row ) mutable {
+      for( auto element : row )
+      {
+         if( element.rowIndex() > 0 && element.localIndex() == 0 )
+            element.value() = -2.0;
+         if( element.localIndex() == 1 )
+            element.value() = 1.0;
+         if( element.rowIndex() < size - 1 && element.localIndex() == 2 )
+            element.value() = -2.0;
+      }
+   };
+   m.forAllRows( f_iter );
+
+   for( IndexType row = 0; row < size; row++ )
+      for( IndexType column = 0; column < size; column++ )
+      {
+         const IndexType diff = row - column;
+         if( diff == 0 )
+            EXPECT_EQ( m.getElement( row, column ), 1.0 );
+         else if( diff == 1 && row > 0 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else if( diff == -1 && row < size - 1 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else
+            EXPECT_EQ( m.getElement( row, column ), 0.0 );
+      }
+
+}
+
 template< typename Matrix >
 void test_VectorProduct()
 {
@@ -1449,6 +1527,13 @@ TYPED_TEST( MatrixTest, addRowTest )
     test_AddRow< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, forRowsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_ForRows< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h
index 928e8336ce6002975d82b2f5f1bdb874af994822..1ae0fda8a0115d1e49e24c32dffdfd5c9c222a5a 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest.h
@@ -95,11 +95,25 @@ TYPED_TEST( MatrixTest, vectorProductTest )
     test_VectorProduct< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, rowsReduction )
+TYPED_TEST( MatrixTest, forElements )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_RowsReduction< MatrixType >();
+    test_ForElements< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, forRows )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_ForRows< MatrixType >();
+}
+
+TYPED_TEST( MatrixTest, reduceRows )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_reduceRows< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, saveAndLoadTest )
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 5c61606b572bafe3d828a515a2bcec9102f9c430..f906adfbf1285f72f16a51e58de4bcd4b4222fb8 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -1319,7 +1319,7 @@ void test_VectorProduct()
       // Test with large diagonal matrix
       Matrix m1( size, size );
       TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowCapacities( size );
-      rowCapacities.forEachElement( [] __cuda_callable__ ( IndexType i, IndexType& value ) { value = 1; } );
+      rowCapacities.forAllElements( [] __cuda_callable__ ( IndexType i, IndexType& value ) { value = 1; } );
       m1.setRowCapacities( rowCapacities );
       auto f1 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) {
          if( localIdx == 0  )
@@ -1328,7 +1328,7 @@ void test_VectorProduct()
             column = row;
          }
       };
-      m1.forEachElement( f1 );
+      m1.forAllElements( f1 );
       // check that the matrix was initialized
       m1.getCompressedRowLengths( rowCapacities );
       EXPECT_EQ( rowCapacities, 1 );
@@ -1343,7 +1343,7 @@ void test_VectorProduct()
       const int rows( size ), columns( size );
       Matrix m2( rows, columns );
       rowCapacities.setSize( rows );
-      rowCapacities.forEachElement( [=] __cuda_callable__ ( IndexType i, IndexType& value ) { value = i + 1; } );
+      rowCapacities.forAllElements( [=] __cuda_callable__ ( IndexType i, IndexType& value ) { value = i + 1; } );
       m2.setRowCapacities( rowCapacities );
       auto f2 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) {
          if( localIdx <= row )
@@ -1352,7 +1352,7 @@ void test_VectorProduct()
             column = localIdx;
          }
       };
-      m2.forEachElement( f2 );
+      m2.forAllElements( f2 );
       // check that the matrix was initialized
       TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowLengths( rows );
       m2.getCompressedRowLengths( rowLengths );
@@ -1384,7 +1384,7 @@ void test_VectorProduct()
          column = localIdx;
          value = localIdx + 1;
       };
-      m3.forEachElement( f );
+      m3.forAllElements( f );
       TNL::Containers::Vector< double, DeviceType, IndexType > in( columns, 1.0 ), out( rows, 0.0 );
       m3.vectorProduct( in, out );
       EXPECT_EQ( out.getElement( 0 ), ( double ) columns * ( double ) (columns + 1 ) / 2.0 );
@@ -1392,7 +1392,98 @@ void test_VectorProduct()
 }
 
 template< typename Matrix >
-void test_RowsReduction()
+void test_ForElements()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /*
+    * Sets up the following 8x3 sparse matrix:
+    *
+    *    /  1  1  1  \
+    *    |  2  2  2  |
+    *    |  3  3  3  |
+    *    |  4  4  4  |
+    *    |  5  5  5  |
+    *    |  6  6  6  |
+    *    |  7  7  7  |
+    *    \  8  8  8  /
+    */
+
+   const IndexType cols = 3;
+   const IndexType rows = 8;
+
+   Matrix m( { 3, 3, 3, 3, 3, 3, 3, 3, 3 }, cols  );
+   m.forAllElements( [] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIdx, RealType& value, bool compute ) mutable {
+      value = rowIdx + 1.0;
+      columnIdx = localIdx;
+   } );
+
+   for( IndexType rowIdx = 0; rowIdx < rows; rowIdx++ )
+      for( IndexType colIdx = 0; colIdx < cols; colIdx++ )
+         EXPECT_EQ( m.getElement( rowIdx, colIdx ), rowIdx + 1.0 );
+}
+
+template< typename Matrix >
+void test_ForRows()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /////
+   // Setup lower triangular matrix
+   const IndexType cols = 8;
+   const IndexType rows = 8;
+
+   /////
+   // Test without iterator
+   Matrix m( { 1, 2, 3, 4, 5, 6, 7, 8, 9 }, cols  );
+   using RowView = typename Matrix::RowView;
+   m.forAllRows( [] __cuda_callable__ ( RowView& row ) mutable {
+      for( IndexType localIdx = 0; localIdx <= row.getRowIndex(); localIdx++ )
+      {
+         row.setValue( localIdx, row.getRowIndex() - localIdx + 1.0 );
+         row.setColumnIndex( localIdx, localIdx );
+      }
+   } );
+
+   for( IndexType rowIdx = 0; rowIdx < rows; rowIdx++ )
+      for( IndexType colIdx = 0; colIdx < cols; colIdx++ )
+      {
+         if( colIdx <= rowIdx )
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), rowIdx - colIdx + 1.0 );
+         else
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), 0.0 );
+      }
+
+   ////
+   // Test with iterator
+   m.getValues() = 0.0;
+   m.forAllRows( [] __cuda_callable__ ( RowView& row ) mutable {
+      for( auto element : row )
+      {
+         if( element.localIndex() <= element.rowIndex() )
+         {
+            element.value() = element.rowIndex() - element.localIndex() + 1.0;
+            element.columnIndex() = element.localIndex();
+         }
+      }
+   } );
+
+   for( IndexType rowIdx = 0; rowIdx < rows; rowIdx++ )
+      for( IndexType colIdx = 0; colIdx < cols; colIdx++ )
+      {
+         if( colIdx <= rowIdx )
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), rowIdx - colIdx + 1.0 );
+         else
+            EXPECT_EQ( m.getElement( rowIdx, colIdx ), 0.0 );
+      }
+}
+
+template< typename Matrix >
+void test_reduceRows()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -1414,8 +1505,7 @@ void test_RowsReduction()
    const IndexType rows = 8;
    const IndexType cols = 8;
 
-   Matrix m;
-   m.setDimensions( rows, cols );
+   Matrix m( rows, cols );
    typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 };
    m.setRowCapacities( rowsCapacities );
 
@@ -1461,7 +1551,7 @@ void test_RowsReduction()
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   m.reduceAllRows( fetch, std::plus<>{}, keep, 0 );
    EXPECT_EQ( rowsCapacities, rowLengths );
    m.getCompressedRowLengths( rowLengths );
    EXPECT_EQ( rowsCapacities, rowLengths );
@@ -1476,7 +1566,7 @@ void test_RowsReduction()
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowSums_view[ rowIdx ] = value;
    };
-   m.allRowsReduction( max_fetch, std::plus<>{}, max_keep, 0 );
+   m.reduceAllRows( max_fetch, std::plus<>{}, max_keep, 0 );
    const RealType maxNorm = TNL::max( rowSums );
    EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36
 }
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
index be28b8ecf61b2520e865a9b1cdc0ccf5d2310be5..0186f0776d3301503abb6d707d05cd81fa69a6a0 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
@@ -30,24 +30,24 @@ using ColumnMajorChunkedEllpack = TNL::Algorithms::Segments::ChunkedEllpack< Dev
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+     TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
+    ,TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >
 #endif
 >;
 
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
index d871579b30a4b36c7a92c795c0e421693b936c1d..e8de36e3000182b3a92f1a2958ddec130f98d822 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h
@@ -88,11 +88,11 @@ TYPED_TEST( MatrixTest, vectorProductTest )
     test_VectorProduct< MatrixType >();
 }
 
-TYPED_TEST( MatrixTest, rowsReduction )
+TYPED_TEST( MatrixTest, reduceRows )
 {
     using MatrixType = typename TestFixture::MatrixType;
 
-    test_RowsReduction< MatrixType >();
+    test_reduceRows< MatrixType >();
 }
 
 TYPED_TEST( MatrixTest, saveAndLoadTest )
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
index 01815e4391eff8a924c72df324654bbce3e3c076..f430fdffabd0604c8f2b539b9bd951044fa86eb8 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp
@@ -858,7 +858,7 @@ void test_VectorProduct()
 }
 
 template< typename Matrix >
-void test_RowsReduction()
+void test_reduceRows()
 {
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
@@ -905,7 +905,7 @@ void test_RowsReduction()
    auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowLengths_view[ rowIdx ] += value;
    };
-   m_5.allRowsReduction( fetch, std::plus<>{}, keep, 0 );
+   m_5.reduceAllRows( fetch, std::plus<>{}, keep, 0 );
 
    EXPECT_EQ( rowLengths_true, rowLengths );
    m_5.getCompressedRowLengths( rowLengths );
@@ -925,7 +925,7 @@ void test_RowsReduction()
    auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
       rowSums_view[ rowIdx ] = value;
    };
-   m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 );
+   m_5.reduceAllRows( max_fetch, max_reduce, max_keep, 0 );
    const RealType maxNorm = TNL::max( rowSums );
    EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36*/
 }
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index a52c7551cc992e2c563e6e6f835af02b13e6f50f..32cf143ad48b2e0435ce0f93721105fd794302f8 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -729,6 +729,86 @@ void test_AddRow()
    EXPECT_EQ( m.getElement( 5, 4 ), 216 );
 }
 
+template< typename Matrix >
+void test_forRows()
+{
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   /**
+    * Prepare lambda matrix of the following form:
+    *
+    * /  1  -2   0   0   0 \
+    * | -2   1  -2   0   0 |
+    * |  0  -2   1  -2   0 |
+    * |  0   0  -2   1  -2 |
+    * \  0   0   0  -2   1 /
+    */
+
+   const IndexType size( 5 );
+   Matrix m( size, size );
+
+   /////
+   // Test without iterator
+   //
+   auto f = [=] __cuda_callable__ ( typename Matrix::RowView& row ) mutable {
+      const IndexType rowIdx = row.getRowIndex();
+      if( rowIdx > 0 )
+         row.setElement( 0, -2.0 );
+      row.setElement( 1, 1.0 );
+      if( rowIdx < size -1 )
+         row.setElement( 2, -2.0 );
+   };
+   m.forAllRows( f );
+
+   for( IndexType row = 0; row < size; row++ )
+      for( IndexType column = 0; column < size; column++ )
+      {
+         const IndexType diff = row - column;
+         if( diff == 0 )
+            EXPECT_EQ( m.getElement( row, column ), 1.0 );
+         else if( diff == 1 && row > 0 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else if( diff == -1 && row < size - 1 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else
+            EXPECT_EQ( m.getElement( row, column ), 0.0 );
+      }
+
+   /////
+   // Test with iterator
+   //
+   m.getValues() = 0.0;
+   auto f_iter = [=] __cuda_callable__ ( typename Matrix::RowView& row ) mutable {
+      for( auto element : row )
+      {
+         if( element.rowIndex() > 0 && element.localIndex() == 0 )
+            element.value() = -2.0;
+         if( element.localIndex() == 1 )
+            element.value() = 1.0;
+         if( element.rowIndex() < size - 1 && element.localIndex() == 2 )
+            element.value() = -2.0;
+      }
+   };
+   m.forAllRows( f_iter );
+
+   for( IndexType row = 0; row < size; row++ )
+      for( IndexType column = 0; column < size; column++ )
+      {
+         const IndexType diff = row - column;
+         if( diff == 0 )
+            EXPECT_EQ( m.getElement( row, column ), 1.0 );
+         else if( diff == 1 && row > 0 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else if( diff == -1 && row < size - 1 )
+            EXPECT_EQ( m.getElement( row, column ), -2.0 );
+         else
+            EXPECT_EQ( m.getElement( row, column ), 0.0 );
+      }
+}
+
+
 template< typename Matrix >
 void test_VectorProduct()
 {
@@ -1387,6 +1467,13 @@ TYPED_TEST( MatrixTest, addRowTest )
     test_AddRow< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, forRowsTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_forRows< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, vectorProductTest )
 {
     using MatrixType = typename TestFixture::MatrixType;