diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index c72b7edfab14917923996147287b6afac440f7b3..553ecc01d3fa146ddaae0137475179dca0b1c779 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -29,156 +29,172 @@ template< typename Real = double,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Dense : public Matrix< Real, Device, Index >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   private:
+      // convenient template alias for controlling the selection of copy-assignment operator
+      template< typename Device2 >
+      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
 
-   // friend class will be needed for templated assignment operators
-   //template< typename Real2, typename Device2, typename Index2 >
-   //friend class Dense;
+      // friend class will be needed for templated assignment operators
+      //template< typename Real2, typename Device2, typename Index2 >
+      //friend class Dense;
 
-public:
-   using RealType = Real;
-   using DeviceType = Device;
-   using IndexType = Index;
-   using BaseType = Matrix< Real, Device, Index >;
-   using ValuesType = typename BaseType::ValuesVector;
-   using ValuesViewType = typename ValuesType::ViewType;
-   using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
-   using SegmentViewType = typename SegmentsType::SegmentViewType;
-   using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = Matrix< Real, Device, Index >;
+      using ValuesType = typename BaseType::ValuesVector;
+      using ValuesViewType = typename ValuesType::ViewType;
+      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >;
+      using SegmentViewType = typename SegmentsType::SegmentViewType;
+      using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >;
 
-   // TODO: remove this
-   using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
-   using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
+      // TODO: remove this
+      using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector;
+      using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView;
 
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Dense< _Real, _Device, _Index >;
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index >
+      using Self = Dense< _Real, _Device, _Index >;
 
-   Dense();
+      Dense();
 
-   static String getSerializationType();
+      static String getSerializationType();
 
-   virtual String getSerializationTypeVirtual() const;
+      virtual String getSerializationTypeVirtual() const;
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   template< typename Matrix >
-   void setLike( const Matrix& matrix );
+      template< typename Matrix >
+      void setLike( const Matrix& matrix );
 
-   /****
-    * This method is only for the compatibility with the sparse matrices.
-    */
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      /****
+       * This method is only for the compatibility with the sparse matrices.
+       */
+      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
 
-   [[deprecated]]
-   IndexType getRowLength( const IndexType row ) const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   IndexType getMaxRowLength() const;
+      IndexType getMaxRowLength() const;
 
-   IndexType getNumberOfMatrixElements() const;
+      IndexType getNumberOfMatrixElements() const;
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   void reset();
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   __cuda_callable__
-   const RowView getRow( const IndexType& rowIdx ) const;
 
-   __cuda_callable__
-   RowView getRow( const IndexType& rowIdx );
+      void reset();
 
+      __cuda_callable__
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   void setValue( const RealType& v );
+      __cuda_callable__
+      RowView getRow( const IndexType& rowIdx );
 
-   __cuda_callable__
-   Real& operator()( const IndexType row,
-                     const IndexType column );
 
-   __cuda_callable__
-   const Real& operator()( const IndexType row,
-                           const IndexType column ) const;
+      void setValue( const RealType& v );
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      __cuda_callable__
+      Real& operator()( const IndexType row,
+                        const IndexType column );
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      __cuda_callable__
+      const Real& operator()( const IndexType row,
+                              const IndexType column ) const;
 
-   Real getElement( const IndexType row,
-                    const IndexType column ) const;
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   /*__cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;*/
+      Real getElement( const IndexType row,
+                       const IndexType column ) const;
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   template< typename InVector, typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   template< typename Matrix >
-   void addMatrix( const Matrix& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   template< typename Matrix1, typename Matrix2, int tileDim = 32 >
-   void getMatrixProduct( const Matrix1& matrix1,
-                       const Matrix2& matrix2,
-                       const RealType& matrix1Multiplicator = 1.0,
-                       const RealType& matrix2Multiplicator = 1.0 );
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   template< typename Matrix, int tileDim = 32 >
-   void getTransposition( const Matrix& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   template< typename Vector1, typename Vector2 >
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   // copy assignment
-   Dense& operator=( const Dense& matrix );
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
+      template< typename InVector, typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   void save( const String& fileName ) const;
+      template< typename Matrix >
+      void addMatrix( const Matrix& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   void load( const String& fileName );
+      template< typename Matrix1, typename Matrix2, int tileDim = 32 >
+      void getMatrixProduct( const Matrix1& matrix1,
+                          const Matrix2& matrix2,
+                          const RealType& matrix1Multiplicator = 1.0,
+                          const RealType& matrix2Multiplicator = 1.0 );
 
-   void save( File& file ) const;
+      template< typename Matrix, int tileDim = 32 >
+      void getTransposition( const Matrix& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   void load( File& file );
+      template< typename Vector1, typename Vector2 >
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   void print( std::ostream& str ) const;
+      // copy assignment
+      Dense& operator=( const Dense& matrix );
 
-protected:
+      // cross-device copy assignment
+      template< typename Real2, typename Device2, typename Index2,
+                typename = typename Enabler< Device2 >::type >
+      Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+      void save( const String& fileName ) const;
+
+      void load( const String& fileName );
+
+      void save( File& file ) const;
+
+      void load( File& file );
+
+      void print( std::ostream& str ) const;
+
+   protected:
+
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType column ) const;
 
-   typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class DenseDeviceDependentCode< DeviceType >;
+      typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
+      friend class DenseDeviceDependentCode< DeviceType >;
 
-   SegmentsType segments;
+      SegmentsType segments;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index bd0614ad08fcf6c62cb78be5612ace5f9b0e448c..680fa3ed284446293e4d3e671f810f4d29738baa 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -94,6 +94,31 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
    this->setDimensions( rowLengths.getSize(), max( rowLengths ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Vector >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getCompressedRowLengths( Vector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   rowLengths = 0;
+   auto rowLengths_view = rowLengths.getView();
+   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
+      return ( value != 0.0 );
+   };
+   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
+      aux += a;
+   };
+   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
+      rowLengths_view[ rowIdx ] = value;
+   };
+   this->allRowsReduction( fetch, reduce, keep, 0 );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -256,12 +281,109 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row,
-                                                        const IndexType column ) const
+Real 
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElement( const IndexType row,
+            const IndexType column ) const
 {
    return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchValue >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+         return fetch( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return zero;
+   };
+   this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   auto values_view = this->values.getView();
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
+      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      return true;
+   };
+   this->segments.forSegments( first, last, f );
+
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function ) const
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+   template< typename Function >
+void
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+forAllRows( Function& function )
+{
+   this->forRows( 0, this->getRows(), function );
+}
+
 template< typename Real,
           typename Device,
           typename Index,