From b3d86f895ee6e994909b719a08705b0673f84011 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Wed, 8 Jan 2020 18:28:57 +0100 Subject: [PATCH] Updating API of tridiagonal matrix. --- src/TNL/Matrices/Tridiagonal.h | 246 ++++++++-------- src/TNL/Matrices/Tridiagonal.hpp | 484 ++++++++++++++++--------------- 2 files changed, 366 insertions(+), 364 deletions(-) diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 3f57fe1c3e..f80bc4c185 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -13,197 +13,179 @@ #include <TNL/Matrices/Matrix.h> #include <TNL/Containers/Vector.h> #include <TNL/Matrices/TridiagonalRow.h> +#include <TNL/Containers/Segments/Ellpack.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class TridiagonalDeviceDependentCode; template< typename Real = double, typename Device = Devices::Host, - typename Index = int > -class Tridiagonal : public Matrix< Real, Device, Index > + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > +class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Tridiagonal; + // friend class will be needed for templated assignment operators + template< typename Real2, typename Device2, typename Index2 > + friend class Tridiagonal; -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef TridiagonalRow< Real, Index > MatrixRow; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; + //using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + //using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = TridiagonalMatrixRowView< SegmentViewType, ValuesViewType >; - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Tridiagonal< _Real, _Device, _Index >; - Tridiagonal(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Tridiagonal< _Real, _Device, _Index >; - static String getSerializationType(); + Tridiagonal(); - virtual String getSerializationTypeVirtual() const; + Tridiagonal( const IndexType rows, const IndexType columns ); - void setDimensions( const IndexType rows, - const IndexType columns ); + ViewType getView(); - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + ConstViewType getConstView() const; - IndexType getRowLength( const IndexType row ) const; + static String getSerializationType(); - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; + virtual String getSerializationTypeVirtual() const; - IndexType getMaxRowLength() const; + void setDimensions( const IndexType rows, + const IndexType columns ); - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - IndexType getNumberOfMatrixElements() const; + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - IndexType getNumberOfNonzeroMatrixElements() const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - IndexType getMaxRowlength() const; + IndexType getMaxRowLength() const; - void reset(); + template< typename Real2, typename Device2, typename Index2 > + void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); - template< typename Real2, typename Device2, typename Index2 > - bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + IndexType getNumberOfMatrixElements() const; - template< typename Real2, typename Device2, typename Index2 > - bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + IndexType getNumberOfNonzeroMatrixElements() const; - void setValue( const RealType& v ); + IndexType getMaxRowlength() const; - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); + void reset(); - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator != ( const Tridiagonal< Real_, Device_, Index_ >& matrix ) const; - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + void setValue( const RealType& v ); - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + RealType getElement( const IndexType row, + const IndexType column ) const; - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - RealType getElement( const IndexType row, - const IndexType column ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - __cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + template< typename Function > + void forAllRows( Function& function ) const; - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + template< typename Function > + void forAllRows( Function& function ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - template< typename Real2, typename Index2 > - void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + template< typename Real2, typename Index2 > + void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - template< typename Real2, typename Index2 > - void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); + template< typename Real2, typename Index2 > + void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); - template< typename Vector1, typename Vector2 > - __cuda_callable__ - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - // copy assignment - Tridiagonal& operator=( const Tridiagonal& matrix ); + // copy assignment + Tridiagonal& operator=( const Tridiagonal& matrix ); - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); - void save( File& file ) const; + void save( File& file ) const; - void load( File& file ); + void load( File& file ); - void save( const String& fileName ) const; + void save( const String& fileName ) const; - void load( const String& fileName ); + void load( const String& fileName ); - void print( std::ostream& str ) const; + void print( std::ostream& str ) const; -protected: + protected: - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; - Containers::Vector< RealType, DeviceType, IndexType > values; + Containers::Vector< RealType, DeviceType, IndexType > values; - typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class TridiagonalDeviceDependentCode< DeviceType >; + typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class TridiagonalDeviceDependentCode< DeviceType >; }; } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Tridiagonal_impl.h> +#include <TNL/Matrices/Tridiagonal.hpp> diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 2752f68503..c36edec0b7 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -15,74 +15,81 @@ #include <TNL/Exceptions/NotImplementedError.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class TridiagonalDeviceDependentCode; template< typename Real, typename Device, - typename Index > -Tridiagonal< Real, Device, Index >::Tridiagonal() -{ -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getType() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal() { - return String( "Matrices::Tridiagonal< " ) + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getTypeVirtual() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal( const IndexType rows, const IndexType columns ) { - return this->getType(); + this->setDimensions( rows, columns ); } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationType() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationType() { return String( "Matrices::Tridiagonal< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationTypeVirtual() const { return this->getSerializationType(); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setDimensions( const IndexType rows, const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); values.setSize( 3*min( rows, columns ) ); - this->values.setValue( 0.0 ); + this->values = 0.0; } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { if( rowLengths[ 0 ] > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); @@ -103,17 +110,12 @@ void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompresse template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const -{ - return this->getRowLengthFast( row ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRowLength( const IndexType row ) const { const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); if( row == 0 ) @@ -129,46 +131,64 @@ Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getMaxRowLength() const { return 3; } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Device2, typename Index2 > -void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) { this->setDimensions( m.getRows(), m.getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getNumberOfMatrixElements() const { return 3 * min( this->getRows(), this->getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getNumberOfNonzeroMatrixElements() const { - IndexType nonzeroElements = 0; - for( IndexType i = 0; i < this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0 ) - nonzeroElements++; - return nonzeroElements; + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > Index -Tridiagonal< Real, Device, Index >:: +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getMaxRowlength() const { return 3; @@ -176,8 +196,12 @@ getMaxRowlength() const template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::reset() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +reset() { Matrix< Real, Device, Index >::reset(); this->values.reset(); @@ -185,48 +209,55 @@ void Tridiagonal< Real, Device, Index >::reset() template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const -{ - return this->values == matrix.values; -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const -{ - return this->values != matrix.values; + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setValue( const RealType& v ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const { - this->values.setValue( v ); + return ! this->operator==( matrix ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setValue( const RealType& v ) { - this->values[ this->getElementIndex( row, column ) ] = value; - return true; + this->values = v; } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) { this->values.setElement( this->getElementIndex( row, column ), value ); return true; @@ -234,159 +265,120 @@ bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) { const Index i = this->getElementIndex( row, column ); - this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value; + this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); return true; } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Real +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElement( const IndexType row, const IndexType column ) const { - const Index i = this->getElementIndex( row, column ); - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true; + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRowFast( row, columns, values, elements, 0.0 ); + } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRow( row, columns, values, elements, 0.0 ); -} -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType& column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElementFast( row, column, values[ i ], thisRowMultiplicator ); - } - return true; } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElement( row, column, values[ i ], thisRowMultiplicator ); - } - return true; + } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) { - if( abs( column - row ) > 1 ) - return 0.0; - return this->values[ this->getElementIndex( row, column ) ]; } template< typename Real, typename Device, - typename Index > -Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) const { - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); + } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) { - IndexType elementPointer( 0 ); - for( IndexType i = -1; i <= 1; i++ ) - { - const IndexType column = row + 1; - if( column >= 0 && column < this->getColumns() ) - { - columns[ elementPointer ] = column; - values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ]; - elementPointer++; - } - } + } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: +typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType rowIndex ) { if( std::is_same< Device, Devices::Host >::value ) @@ -403,10 +395,12 @@ getRow( const IndexType rowIndex ) template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -const typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: +const typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType rowIndex ) const { throw Exceptions::NotImplementedError(); @@ -415,10 +409,12 @@ getRow( const IndexType rowIndex ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row, +typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row, const Vector& vector ) const { return TridiagonalDeviceDependentCode< Device >:: @@ -430,10 +426,12 @@ typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename InVector, typename OutVector > -void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), @@ -448,9 +446,11 @@ void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -494,9 +494,11 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), @@ -541,10 +543,12 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -561,9 +565,11 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, // copy assignment template< typename Real, typename Device, - typename Index > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -573,10 +579,12 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) // cross-device copy assignment template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Device2, typename Index2, typename > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); @@ -591,8 +599,10 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2 template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( File& file ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); file << this->values; @@ -600,8 +610,10 @@ void Tridiagonal< Real, Device, Index >::save( File& file ) const template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( File& file ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); file >> this->values; @@ -609,24 +621,30 @@ void Tridiagonal< Real, Device, Index >::load( File& file ) template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const { Object::save( fileName ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( const String& fileName ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) { Object::load( fileName ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -640,9 +658,11 @@ void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row, +Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row, const IndexType column ) const { TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows, @@ -694,7 +714,7 @@ class TridiagonalDeviceDependentCode< Devices::Host > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, + static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { @@ -710,7 +730,7 @@ template<> class TridiagonalDeviceDependentCode< Devices::Cuda > { public: - + typedef Devices::Cuda Device; template< typename Index > @@ -747,7 +767,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, + static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { -- GitLab