diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp
index 9ab2186c36f01fc8c4806659ab01ce84bb17ee63..83da548fc9eeefbea54c4d45883bd21cbe33f632 100644
--- a/src/TNL/Containers/Segments/CSR.hpp
+++ b/src/TNL/Containers/Segments/CSR.hpp
@@ -218,14 +218,15 @@ void
 CSR< Device, Index, IndexAllocator >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
-      for( IndexType j = begin; j < end; j++  )
-         reduction( aux, fetch( i, j, args... ) );
+      bool compute( true );
+      for( IndexType j = begin; j < end && compute; j++  )
+         reduction( aux, fetch( i, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp
index f4f59370d46a5cbcb8502ff5e30d181397c382ca..b4304ee321446ddec2074dba7bd0e8bf48f2c57b 100644
--- a/src/TNL/Containers/Segments/CSRView.hpp
+++ b/src/TNL/Containers/Segments/CSRView.hpp
@@ -204,14 +204,15 @@ void
 CSRView< Device, Index >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto offsetsView = this->offsets.getConstView();
    auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
       const IndexType begin = offsetsView[ i ];
       const IndexType end = offsetsView[ i + 1 ];
       RealType aux( zero );
-      for( IndexType j = begin; j < end; j++  )
-         reduction( aux, fetch( i, j, args... ) );
+      bool compute( true );
+      for( IndexType j = begin; j < end && compute; j++  )
+         reduction( aux, fetch( i, j, compute, args... ) );
       keeper( i, aux );
    };
    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp
index 9f7702a6f2e071cfd255a2c69ed20b6f9a9b2856..ebc2b360eb91a4100db7a1d6599b33536c74f4b6 100644
--- a/src/TNL/Containers/Segments/Ellpack.hpp
+++ b/src/TNL/Containers/Segments/Ellpack.hpp
@@ -306,31 +306,32 @@ void
 Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType segmentSize = this->segmentSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j++  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j++  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i;
          const IndexType end = storageSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j += alignedSize  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j += alignedSize  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp
index f5dba4f3d7fb65c5e144de97fde23f58a2893505..dc6bd485dd3faf5710268f590385037af4a3f521 100644
--- a/src/TNL/Containers/Segments/EllpackView.hpp
+++ b/src/TNL/Containers/Segments/EllpackView.hpp
@@ -245,31 +245,32 @@ void
 EllpackView< Device, Index, RowMajorOrder, Alignment >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    if( RowMajorOrder )
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType segmentSize = this->segmentSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j++  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j++  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
    }
    else
    {
-      using RealType = decltype( fetch( IndexType(), IndexType() ) );
       const IndexType storageSize = this->getStorageSize();
       const IndexType alignedSize = this->alignedSize;
       auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
          const IndexType begin = i;
          const IndexType end = storageSize;
          RealType aux( zero );
-         for( IndexType j = begin; j < end; j += alignedSize  )
-            reduction( aux, fetch( i, j, args... ) );
+         bool compute( true );
+         for( IndexType j = begin; j < end && compute; j += alignedSize  )
+            reduction( aux, fetch( i, j, compute, args... ) );
          keeper( i, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp
index e2aec924d58ef22c5c896bc815506a3e468b68de..ecd32abb25cd08997d389c087bbb039507f4718e 100644
--- a/src/TNL/Containers/Segments/SlicedEllpack.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp
@@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes )
    const auto sizes_view = sizes.getConstView();
    auto slices_view = this->sliceOffsets.getView();
    auto slice_segment_size_view = this->sliceSegmentSizes.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       if( globalIdx < _size )
          return sizes_view[ globalIdx ];
       return 0;
@@ -341,7 +341,7 @@ void
 SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
@@ -353,8 +353,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -368,8 +369,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
index 139a09a15e515f1a5ffe83cbd4f3c5f2c8933490..41b49ed150459376067eb36e3a1b4b1616bd37c5 100644
--- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp
+++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp
@@ -247,8 +247,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          IndexType localIdx( 0 );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx++  )
-            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+            if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -262,8 +263,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          IndexType localIdx( 0 );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
-            if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize )
+            if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) )
                break;
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -291,7 +293,7 @@ void
 SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = decltype( fetch( IndexType(), IndexType() ) );
+   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
    const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
    const auto sliceOffsets_view = this->sliceOffsets.getConstView();
    if( RowMajorOrder )
@@ -303,8 +305,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
          const IndexType end = begin + segmentSize;
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -318,8 +321,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
          const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
          const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
          RealType aux( zero );
-         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
-            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
+         bool compute( true );
+         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
+            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
          keeper( segmentIdx, aux );
       };
       Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index c0dd3b9a3ecd9623177374f81b39bad96e3f7528..691157a9c467abb378017338110b986ecedcea77 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -628,9 +628,10 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
       const IndexType column = columnIndexesView[ offset ];
-      if( column == paddingIndex )
+      compute = ( column != paddingIndex );
+      if( ! compute )
          return 0.0;
       return valuesView[ offset ] * inVectorView[ column ];
    };
@@ -658,7 +659,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
    const auto columns_view = this->columnIndexes.getConstView();
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
-   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
          return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index 5ac494a9b6a593fed8f7c8580f6169bb4eb0c65a..ce0e7aa1818a743a6b11816ee2b2904e8c92cfb9 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -508,9 +508,10 @@ vectorProduct( const InVector& inVector,
    const auto valuesView = this->values.getConstView();
    const auto columnIndexesView = this->columnIndexes.getConstView();
    const IndexType paddingIndex = this->getPaddingIndex();
-   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType {
+   auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType {
       const IndexType column = columnIndexesView[ offset ];
-      if( column == paddingIndex )
+      compute = ( column != paddingIndex );
+      if( ! compute )
          return 0.0;
       return valuesView[ offset ] * inVectorView[ column ];
    };
diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
index 5e74f96b039b23102908c772f576b9592c1bb079..6189c2e9a4b3c3d81ef1be5466cd79b6d746f188 100644
--- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp
+++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp
@@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments()
 
    const auto v_view = v.getConstView();
    auto result_view = result.getView();
-   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
+   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
       return v_view[ globalIdx ];
    };
    auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) {