diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h
index f274200b223fce4f7b369443bf71446677e6ee20..97b046fba6d37ba1a1633e6aad3c29c77745f8c4 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/CSR.h
@@ -63,6 +63,9 @@ class CSR : public Sparse< Real, Device, Index >
 
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const CSR< Real2, Device2, Index2 >& matrix );
 
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 3563fa789e21554f819ecf3183a0092377893e0c..abc4e282312549d2d955c9d682dc3e757766ecb4 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -117,6 +117,15 @@ template< typename Real,
           typename Device,
           typename Index >
 Index CSR< Real, Device, Index >::getRowLength( const IndexType row ) const
+{
+   return this->rowPointers.getElement( row + 1 ) - this->rowPointers.getElement( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
 {
    return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
 }
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h
index dace4893cc3225c8f5d431a3097c4c16e76055ea..c86081216eb5e180664ae8e32fb4e3217b8c23db 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/ChunkedEllpack.h
@@ -92,6 +92,9 @@ class ChunkedEllpack : public Sparse< Real, Device, Index >
 
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix );
 
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 3840384b90fe6ecb4951df6ddc4b7dd45d7f90a4..bd94540b79e43bff987e4898b06e063cd233716d 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -262,9 +262,21 @@ template< typename Real,
           typename Index >
 Index ChunkedEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const
 {
-   const IndexType& sliceIndex = rowToSliceMapping[ row ];
+   const IndexType& sliceIndex = rowToSliceMapping.getElement( row );
    TNL_ASSERT( sliceIndex < this->rows, );
    const IndexType& chunkSize = slices.getElement( sliceIndex ).chunkSize;
+   return rowPointers.getElement( row + 1 ) - rowPointers.getElement( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index ChunkedEllpack< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+{
+   const IndexType& sliceIndex = rowToSliceMapping[ row ];
+   TNL_ASSERT( sliceIndex < this->rows, );
+   const IndexType& chunkSize = slices[ sliceIndex ].chunkSize;
    return rowPointers[ row + 1 ] - rowPointers[ row ];
 }
 
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index bd5daa284dd716dae3462cfab6e66edee37a4820..5d94b5bda6c711a9c16e4e135285b8cf9677a886 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -66,6 +66,9 @@ class Dense : public Matrix< Real, Device, Index >
     */
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    IndexType getMaxRowLength() const;
 
    IndexType getNumberOfMatrixElements() const;
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index 55282e8d6228d246e547de5e5f5623cbf327eddf..08f66239c3b83a4ae60ee228a11a928a44f75445 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -99,6 +99,15 @@ Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const
    return this->getColumns();
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+{
+   return this->getColumns();
+}
+
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h
index 65f06a550ecab5765f64b9e39e7a28617fcb9808..4e1b92b9497d4fca7b1736261b700272813a427c 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Ellpack.h
@@ -56,6 +56,9 @@ class Ellpack : public Sparse< Real, Device, Index >
 
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const Ellpack< Real2, Device2, Index2 >& matrix );
 
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index 9f12aee4c9393ad3560f2dc7e07929f5844a56ac..ca23f4168d10b8ce73eeac1aca3996028a95c8c8 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -113,6 +113,15 @@ Index Ellpack< Real, Device, Index >::getRowLength( const IndexType row ) const
    return this->rowLengths;
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Ellpack< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+{
+   return this->rowLengths;
+}
+
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 058c05217efe04f0feb3a558d8f6eff3920ab6d6..884e804ec30a5c4948cd9ee03cc710d571eae838 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -53,6 +53,9 @@ class Multidiagonal : public Matrix< Real, Device, Index >
 
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    IndexType getMaxRowLength() const;
 
    template< typename Vector >
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h
index 91588a515e9f12c4b3e6849b569e9a9c689202d2..ea8c4050db60117af1cc720088d520a9ca849187 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Multidiagonal_impl.h
@@ -105,6 +105,22 @@ Index Multidiagonal< Real, Device, Index >::getRowLength( const IndexType row )
    return rowLength;
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Multidiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+{
+   IndexType rowLength( 0 );
+   for( IndexType i = 0; i < diagonalsShift.getSize(); i++ )
+   {
+      const IndexType column = row + diagonalsShift[ i ];
+      if( column >= 0 && column < this->getColumns() )
+         rowLength++;
+   }
+   return rowLength;
+}
+
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h
index ef1d542bf30d9402a2d96b06eb48354ca990d813..2bd6e762d7be611180fbf5d3c77d22ed8910622b 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/SlicedEllpack.h
@@ -84,6 +84,9 @@ class SlicedEllpack : public Sparse< Real, Device, Index >
 
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    template< typename Real2, typename Device2, typename Index2 >
    void setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix );
 
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 0f22e69b9c52fa6cb9336cff16a9328de68c5d32..bcbed7d986038afae15e7ffd3de330ccfca7c7a1 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -108,6 +108,17 @@ Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLength( const Index
    return this->sliceCompressedRowLengths.getElement( slice );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+__cuda_callable__
+Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLengthFast( const IndexType row ) const
+{
+   const IndexType slice = row / SliceSize;
+   return this->sliceCompressedRowLengths[ slice ];
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 396ea25192e02195ca973516d8ff64dd4c8335ea..5ebd526231d52e8fd9f6f69479f499af8158b797 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -54,6 +54,9 @@ class Tridiagonal : public Matrix< Real, Device, Index >
 
    IndexType getRowLength( const IndexType row ) const;
 
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
    IndexType getMaxRowLength() const;
 
    template< typename Real2, typename Device2, typename Index2 >
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index c1239627da2d8d8dd7351a75b9b77f6c8fdb6b5d..fb070c7f09c06482e1a8883a4da5a41e1eb1bdf0 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -98,6 +98,15 @@ template< typename Real,
           typename Device,
           typename Index >
 Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
+{
+   return this->getRowLengthFast( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
 {
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    if( row == 0 )