Loading src/TNL/Matrices/Dense.h +123 −107 Original line number Diff line number Diff line Loading @@ -84,6 +84,10 @@ public: IndexType getNumberOfNonzeroMatrixElements() const; template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; void reset(); __cuda_callable__ Loading Loading @@ -115,11 +119,23 @@ public: Real getElement( const IndexType row, const IndexType column ) const; /*__cuda_callable__ MatrixRow getRow( const IndexType rowIndex ); template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; __cuda_callable__ const MatrixRow getRow( const IndexType rowIndex ) const;*/ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; template< typename Function > void forRows( IndexType first, IndexType last, Function& function ); template< typename Function > void forAllRows( Function& function ) const; template< typename Function > void forAllRows( Function& function ); template< typename Vector > __cuda_callable__ Loading src/TNL/Matrices/Dense.hpp +124 −2 Original line number Diff line number Diff line Loading @@ -94,6 +94,31 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) this->setDimensions( rowLengths.getSize(), max( rowLengths ) ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Vector > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { rowLengths.setSize( this->getRows() ); rowLengths = 0; auto rowLengths_view = rowLengths.getView(); auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { aux += a; }; auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { rowLengths_view[ rowIdx ] = value; }; this->allRowsReduction( fetch, reduce, keep, 0 ); } template< typename Real, typename Device, typename Index, Loading Loading @@ -256,12 +281,109 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row, Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->values.getElement( this->getElementIndex( row, column ) ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const { const auto values_view = this->values.getConstView(); auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); return zero; }; this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) const { this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) { this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, Loading Loading
src/TNL/Matrices/Dense.h +123 −107 Original line number Diff line number Diff line Loading @@ -84,6 +84,10 @@ public: IndexType getNumberOfNonzeroMatrixElements() const; template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; void reset(); __cuda_callable__ Loading Loading @@ -115,11 +119,23 @@ public: Real getElement( const IndexType row, const IndexType column ) const; /*__cuda_callable__ MatrixRow getRow( const IndexType rowIndex ); template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; __cuda_callable__ const MatrixRow getRow( const IndexType rowIndex ) const;*/ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; template< typename Function > void forRows( IndexType first, IndexType last, Function& function ); template< typename Function > void forAllRows( Function& function ) const; template< typename Function > void forAllRows( Function& function ); template< typename Vector > __cuda_callable__ Loading
src/TNL/Matrices/Dense.hpp +124 −2 Original line number Diff line number Diff line Loading @@ -94,6 +94,31 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) this->setDimensions( rowLengths.getSize(), max( rowLengths ) ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Vector > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { rowLengths.setSize( this->getRows() ); rowLengths = 0; auto rowLengths_view = rowLengths.getView(); auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { aux += a; }; auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { rowLengths_view[ rowIdx ] = value; }; this->allRowsReduction( fetch, reduce, keep, 0 ); } template< typename Real, typename Device, typename Index, Loading Loading @@ -256,12 +281,109 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row, Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->values.getElement( this->getElementIndex( row, column ) ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const { const auto values_view = this->values.getConstView(); auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); return zero; }; this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) const { this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Function > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) { this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, typename Index, Loading