From 873b9a9c296f632b4f87eee8df8bbf801731d9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Fri, 3 Jan 2020 16:25:20 +0100 Subject: [PATCH] Added methods forRows and rowsReduction to dense matrix. --- src/TNL/Matrices/Dense.h | 230 ++++++++++++++++++++----------------- src/TNL/Matrices/Dense.hpp | 126 +++++++++++++++++++- 2 files changed, 247 insertions(+), 109 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index c72b7edfab..553ecc01d3 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -29,156 +29,172 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - // friend class will be needed for templated assignment operators - //template< typename Real2, typename Device2, typename Index2 > - //friend class Dense; + // friend class will be needed for templated assignment operators + //template< typename Real2, typename Device2, typename Index2 > + //friend class Dense; -public: - using RealType = Real; - using DeviceType = Device; - using IndexType = Index; - using BaseType = Matrix< Real, Device, Index >; - using ValuesType = typename BaseType::ValuesVector; - using ValuesViewType = typename ValuesType::ViewType; - using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; - using SegmentViewType = typename SegmentsType::SegmentViewType; - using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = Matrix< Real, Device, Index >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; - // TODO: remove this - using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; - using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; + // TODO: remove this + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Dense< _Real, _Device, _Index >; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Dense< _Real, _Device, _Index >; - Dense(); + Dense(); - static String getSerializationType(); + static String getSerializationType(); - virtual String getSerializationTypeVirtual() const; + virtual String getSerializationTypeVirtual() const; - void setDimensions( const IndexType rows, - const IndexType columns ); + void setDimensions( const IndexType rows, + const IndexType columns ); - template< typename Matrix > - void setLike( const Matrix& matrix ); + template< typename Matrix > + void setLike( const Matrix& matrix ); - /**** - * This method is only for the compatibility with the sparse matrices. - */ - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + /**** + * This method is only for the compatibility with the sparse matrices. + */ + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - [[deprecated]] - IndexType getRowLength( const IndexType row ) const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - IndexType getMaxRowLength() const; + IndexType getMaxRowLength() const; - IndexType getNumberOfMatrixElements() const; + IndexType getNumberOfMatrixElements() const; - IndexType getNumberOfNonzeroMatrixElements() const; + IndexType getNumberOfNonzeroMatrixElements() const; - void reset(); + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - __cuda_callable__ - const RowView getRow( const IndexType& rowIdx ) const; - __cuda_callable__ - RowView getRow( const IndexType& rowIdx ); + void reset(); + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; - void setValue( const RealType& v ); + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); - __cuda_callable__ - Real& operator()( const IndexType row, - const IndexType column ); - __cuda_callable__ - const Real& operator()( const IndexType row, - const IndexType column ) const; + void setValue( const RealType& v ); - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + __cuda_callable__ + Real& operator()( const IndexType row, + const IndexType column ); - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + __cuda_callable__ + const Real& operator()( const IndexType row, + const IndexType column ) const; - Real getElement( const IndexType row, - const IndexType column ) const; + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); - /*__cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const;*/ + Real getElement( const IndexType row, + const IndexType column ) const; - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - template< typename InVector, typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - template< typename Matrix > - void addMatrix( const Matrix& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - template< typename Matrix1, typename Matrix2, int tileDim = 32 > - void getMatrixProduct( const Matrix1& matrix1, - const Matrix2& matrix2, - const RealType& matrix1Multiplicator = 1.0, - const RealType& matrix2Multiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - template< typename Matrix, int tileDim = 32 > - void getTransposition( const Matrix& matrix, - const RealType& matrixMultiplicator = 1.0 ); + template< typename Function > + void forAllRows( Function& function ) const; - template< typename Vector1, typename Vector2 > - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + template< typename Function > + void forAllRows( Function& function ); - // copy assignment - Dense& operator=( const Dense& matrix ); + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename InVector, typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - void save( const String& fileName ) const; + template< typename Matrix > + void addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - void load( const String& fileName ); + template< typename Matrix1, typename Matrix2, int tileDim = 32 > + void getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator = 1.0, + const RealType& matrix2Multiplicator = 1.0 ); - void save( File& file ) const; + template< typename Matrix, int tileDim = 32 > + void getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0 ); - void load( File& file ); + template< typename Vector1, typename Vector2 > + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - void print( std::ostream& str ) const; + // copy assignment + Dense& operator=( const Dense& matrix ); -protected: + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void save( File& file ) const; + + void load( File& file ); + + void print( std::ostream& str ) const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; - typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class DenseDeviceDependentCode< DeviceType >; + typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class DenseDeviceDependentCode< DeviceType >; - SegmentsType segments; + SegmentsType segments; }; } // namespace Matrices diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index bd0614ad08..680fa3ed28 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -94,6 +94,31 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) this->setDimensions( rowLengths.getSize(), max( rowLengths ) ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + template< typename Real, typename Device, typename Index, @@ -256,12 +281,109 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row, - const IndexType column ) const +Real +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElement( const IndexType row, + const IndexType column ) const { return this->values.getElement( this->getElementIndex( row, column ) ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto values_view = this->values.getConstView(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, columnIdx, values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, columnIdx, values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + template< typename Real, typename Device, typename Index, -- GitLab