diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index bbed8e3cb032c61a2e8add669012b8d21f8b720a..2d2b5833172f66fd9cf673d8ca357cf7b5c6f265 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -195,15 +195,16 @@ void
 CSRView< Device, Index >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
+      IndexType localIdx( 0 );
       bool compute( true );
       for( IndexType j = begin; j < end && compute; j++  )
-         reduction( aux, fetch( i, j, compute, args... ) );
+         reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index dc6bd485dd3faf5710268f590385037af4a3f521..21be8865419440897c371803b76e17c6e47cc3d2 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -245,7 +245,7 @@ void
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
       const IndexType segmentSize = this->segmentSize;
@@ -253,9 +253,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType j = begin; j < end && compute; j++  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+            reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -268,9 +269,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = i;
          const IndexType end = storageSize;
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType j = begin; j < end && compute; j += alignedSize  )
-            reduction( aux, fetch( i, j, compute, args... ) );
+            reduction( aux, fetch( i, localIdx++, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 82570664fae64a181523ffe9a79fcf87624d2d17..5f9cbdee3e26ad9b7c8e9a71902fc84c77210de3 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -293,7 +293,7 @@ void
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
@@ -305,9 +305,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -321,9 +322,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
+         IndexType localIdx( 0 );
          bool compute( true );
          for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
+            reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 6aa75995f55f7080f1c63af7d45a83a5dec2b917..8dbe53f4dc11b2e7fdfaab4fc30354fa15ce8094 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -417,12 +417,12 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
-      const IndexType column = columnIndexesView[ offset ];
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType {
+      const IndexType column = columnIndexesView[ globalIdx ];
       compute = ( column != paddingIndex );
       if( ! compute )
          return 0.0;
-      return valuesView[ offset ] * inVectorView[ column ];
+      return valuesView[ globalIdx ] * inVectorView[ column ];
    };
    auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
       sum += value;
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 6189c2e9a4b3c3d81ef1be5466cd79b6d746f188..8320fafe57455190780fa25b6d16b6b463df2eb6 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments()
 
    const auto v_view = v.getConstView();
    auto result_view = result.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       return v_view[ globalIdx ];
    };
    auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index bf261aa8464d3dd2f36a71e4cf34dd5de352ab94..b0a9fcb000302910abd7fb7a8c2265d1f44f028e 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -1214,7 +1214,7 @@ void test_RowsReduction()
    // Compute number of non-zero elements in rows.
    typename Matrix::RowsCapacitiesType rowLengths( rows );
    auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return ( value != 0.0 );
    };
    auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
@@ -1232,7 +1232,7 @@ void test_RowsReduction()
    // Compute max norm
    TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows );
    auto rowSums_view = rowSums.getView();
-   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+   auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType {
       return abs( value );
    };
    auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {