diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index d2827015695da2183c16c152e10d1d711149b51e..e7e3ab6b279ce53f9db662fc58efd8b53109f4b4 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -50,13 +50,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > typename _Index = Index > using Self = Tridiagonal< _Real, _Device, _Index >; + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + Tridiagonal(); Tridiagonal( const IndexType rows, const IndexType columns ); - ViewType getView(); + ViewType getView() const; // TODO: remove const - ConstViewType getConstView() const; + //ConstViewType getConstView() const; static String getSerializationType(); @@ -168,6 +170,10 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void print( std::ostream& str ) const; + const IndexerType& getIndexer() const; + + IndexerType& getIndexer(); + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index c6d359d3be42c618eaf28c33a0b320f5d0dee1f7..6c09238ff0628b06584007d4336631d4b28650f9 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -49,12 +49,13 @@ template< typename Real, typename RealAllocator > auto Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getView() -> ViewType +getView() const -> ViewType { - return ViewType( this->values.getView(), indexer ); + // TODO: fix when getConstView works + return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer ); } -template< typename Real, +/*template< typename Real, typename Device, typename Index, bool RowMajorOrder, @@ -64,7 +65,7 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), indexer ); -} +}*/ template< typename Real, typename Device, @@ -146,19 +147,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { return this->view.getCompressedRowLengths( rowLengths ); - /*rowLengths.setSize( this->getRows() ); - rowLengths = 0; - auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { - return ( value != 0.0 ); - }; - auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { - aux += a; - }; - auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowLengths_view[ rowIdx ] = value; - }; - this->allRowsReduction( fetch, reduce, keep, 0 );*/ } template< typename Real, @@ -171,7 +159,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRowLength( const IndexType row ) const { return this->view.getRowLength( row ); - //return this->indexer.getRowSize( row ); } template< typename Real, @@ -209,11 +196,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getNumberOfNonzeroMatrixElements() const { return this->view.getNumberOfNonzeroMatrixElements(); - /*const auto values_view = this->values.getConstView(); - auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { - return ( values_view[ i ] != 0.0 ); - }; - return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );*/ } template< typename Real, @@ -283,7 +265,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); - //return RowView( this->values.getView(), this->indexer ); } template< typename Real, @@ -297,7 +278,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); - //return RowView( this->values.getView(), this->indexer ); } template< typename Real, @@ -310,18 +290,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { return this->view.setElement( row, column, value ); - /*TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); - TNL_ASSERT_GE( column, 0, "" ); - TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( row - column ) > 1 ) - { - std::stringstream msg; - msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; - throw std::logic_error( msg.str() ); - } - this->values.setElement( this->getElementIndex( row, column ), value ); - return true;*/ } template< typename Real, @@ -337,19 +305,6 @@ addElement( const IndexType row, const RealType& thisElementMultiplicator ) { return this->view.addElement( row, column, value, thisElementMultiplicator ); - /*TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); - TNL_ASSERT_GE( column, 0, "" ); - TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( row - column ) > 1 ) - { - std::stringstream msg; - msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; - throw std::logic_error( msg.str() ); - } - const Index i = this->getElementIndex( row, column ); - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true;*/ } template< typename Real, @@ -362,14 +317,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->view.getElement( row, column ); - /*TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); - TNL_ASSERT_GE( column, 0, "" ); - TNL_ASSERT_LT( column, this->getColumns(), "" ); - - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) );*/ } template< typename Real, @@ -383,39 +330,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); - /*using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); - const auto values_view = this->values.getConstView(); - const auto indexer = this->indexer; - const auto zero = zero_; - auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - Real_ sum( zero ); - if( rowIdx == 0 ) - { - reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); - reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); - keep( 0, sum ); - return; - } - if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() ) - { - reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); - keep( rowIdx, sum ); - return; - } - if( indexer.getRows() == indexer.getColumns() ) - { - reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); - keep( rowIdx, sum ); - } - else - { - keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - } - }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -442,45 +356,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); - /*const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; - auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; - if( rowIdx == 0 ) - { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; - } - if( rows == columns ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - } - else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } - }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -494,45 +369,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { this->view.forRows( first, last, function ); - /*const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; - auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; - if( rowIdx == 0 ) - { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; - } - if( rows == columns ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - } - else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } - }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -573,11 +409,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { return this->view.rowVectorProduct(); - /*return TridiagonalDeviceDependentCode< Device >:: - rowVectorProduct( this->rows, - this->values, - row, - vector );*/ } template< typename Real, @@ -592,14 +423,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { this->view.vectorProduct( inVector, outVector ); - /*TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl );*/ - - //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, @@ -614,14 +437,7 @@ addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - TNL_ASSERT( this->getRows() == matrix.getRows(), - std::cerr << "This matrix columns: " << this->getColumns() << std::endl - << "This matrix rows: " << this->getRows() << std::endl ); - - if( thisMatrixMultiplicator == 1.0 ) - this->values += matrixMultiplicator * matrix.values; - else - this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; + this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator ); } #ifdef HAVE_CUDA @@ -753,11 +569,31 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca "unknown device" ); this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet."); + if( RowMajorOrder == RowMajorOrder_ ) + this->values = matrix.getValues(); + else + { + if( std::is_same< Device, Device_ >::value ) + { + const auto matrix_view = matrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + else + { + Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix; + auxMatrix = matrix; + const auto matrix_view = auxMatrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + } } - template< typename Real, typename Device, typename Index, @@ -777,6 +613,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( Fil { Matrix< Real, Device, Index >::load( file ); this->indexer.setDimensions( this->getRows(), this->getColumns() ); + this->view = this->getView(); } template< typename Real, @@ -804,7 +641,9 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -816,6 +655,30 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( st } } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 78593acf511153ab4051be71f5895210cdafb93a..29006279390150cbd80a469c8ffed124ca67be17 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -146,6 +146,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > void print( std::ostream& str ) const; + __cuda_callable__ + const IndexerType& getIndexer() const; + + __cuda_callable__ + IndexerType& getIndexer(); + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 83ff6035dd36b628f02e0ac4d324b890dfb05b82..4d4950c4e92be8568219f0102371b0d9e86b0979 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -297,7 +297,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke keep( 0, sum ); return; } - if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() ) + if( rowIdx + 1 < indexer.getColumns() ) { reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); @@ -305,7 +305,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke keep( rowIdx, sum ); return; } - if( indexer.getRows() == indexer.getColumns() ) + if( rowIdx < indexer.getColumns() ) { reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); @@ -313,7 +313,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke } else { - keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); @@ -328,7 +328,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); + this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero ); } template< typename Real, @@ -341,42 +341,26 @@ TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; + const auto indexer = this->indexer; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; if( rowIdx == 0 ) { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); } - if( rows == columns ) + else if( rowIdx < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); } else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -390,43 +374,27 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) { - const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; + auto values_view = this->values.getView(); + const auto indexer = this->indexer; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; if( rowIdx == 0 ) { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); } - if( rows == columns ) + else if( rowIdx < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); } else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -440,7 +408,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forAllRows( Function& function ) const { - this->forRows( 0, this->getRows(), function ); + this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); } template< typename Real, @@ -452,7 +420,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forAllRows( Function& function ) { - this->forRows( 0, this->getRows(), function ); + this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function ); } template< typename Real, @@ -477,14 +445,22 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); - //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { + return value * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); } template< typename Real, @@ -498,18 +474,41 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ > const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - TNL_ASSERT( this->getRows() == matrix.getRows(), - std::cerr << "This matrix columns: " << this->getColumns() << std::endl - << "This matrix rows: " << this->getRows() << std::endl ); + TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); + TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); - if( thisMatrixMultiplicator == 1.0 ) - this->values += matrixMultiplicator * matrix.values; + if( RowMajorOrder == RowMajorOrder_ ) + { + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.getValues(); + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues(); + } else - this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; + { + const auto matrix_view = matrix; + const auto matrixMult = matrixMultiplicator; + const auto thisMult = thisMatrixMultiplicator; + auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + if( thisMult == 0.0 ) + this->forAllRows( add0 ); + else if( thisMult == 1.0 ) + this->forAllRows( add1 ); + else + this->forAllRows( addGen ); + } } #ifdef HAVE_CUDA -template< typename Real, +/*template< typename Real, typename Real2, typename Index, typename Index2 > @@ -533,7 +532,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De rowIdx, matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); } -} +}*/ #endif template< typename Real, @@ -563,7 +562,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Tridiagonal* kernel_this = Cuda::passToDevice( *this ); + /*Tridiagonal* kernel_this = Cuda::passToDevice( *this ); typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); @@ -581,7 +580,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, } Cuda::freeFromDevice( kernel_this ); Cuda::freeFromDevice( kernel_inMatrix ); - TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE;*/ #endif } } @@ -644,6 +643,30 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os } } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h index d9fdd0c23cc5bc93ea58b62f31130b9a3674adff..6d3377b4f938399452230d3e5a3739d26e4e755f 100644 --- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h +++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h @@ -65,7 +65,7 @@ class TridiagonalMatrixIndexer const IndexType& getColumns() const { return this->columns; }; __cuda_callable__ - const IndexType& getSize() const { return this->nonEmptyRows; }; + const IndexType& getNonEmptyRowsCount() const { return this->nonEmptyRows; }; __cuda_callable__ IndexType getStorageSize() const { return 3 * this->nonEmptyRows; }; diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 28749540556121c826e5de3457ddad9f3996201f..4b95380c4477b33184e6c80946d9f0aebd95e04c 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -13,8 +13,8 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) +# CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -42,9 +42,9 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) - TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) +# ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) +# TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) @@ -65,7 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +#ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index dcd14302a446b32171574bf805921ba4b63d322a..2c476670b7ecffd7c985475d5f154acefcc2d4d8 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -8,6 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ +#include <sstream> #include <TNL/Devices/Host.h> #include <TNL/Matrices/Matrix.h> #include <TNL/Matrices/Tridiagonal.h> @@ -774,8 +775,11 @@ void test_VectorProduct() RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) + { if( abs( i - j ) <= 1 ) - m.setElement( i, j, value++ ); + m.setElement( i, j, value ); + value++; + } using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; @@ -787,7 +791,6 @@ void test_VectorProduct() m.vectorProduct( inVector, outVector); - std::cerr << outVector << std::endl; EXPECT_EQ( outVector.getElement( 0 ), 6 ); EXPECT_EQ( outVector.getElement( 1 ), 36 ); EXPECT_EQ( outVector.getElement( 2 ), 66 ); @@ -795,122 +798,123 @@ void test_VectorProduct() EXPECT_EQ( outVector.getElement( 4 ), 40 ); } -template< typename Matrix > +template< typename Matrix1, typename Matrix2 = Matrix1 > void test_AddMatrix() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix1 m( rows, cols ); -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - Matrix m2; - m2.reset(); - m2.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 0 0 \ + * | 3 4 5 0 | + * | 0 6 7 8 | + * | 0 0 9 10 | + * \ 0 0 0 11 / + */ + Matrix2 m2( rows, cols ); - RealType newValue = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + if( abs( i - j ) <= 1 ) m2.setElement( i, j, newValue++ ); - /* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ + /* + * Compute the following 5x4 dense matrix: + * + * / 1 2 0 0 \ / 1 2 0 0 \ / 3 6 0 0 \ + * | 5 6 7 0 | | 3 4 5 0 | | 11 14 17 0 | + * | 0 10 11 12 | + 2 * | 0 6 7 8 | = | 0 22 25 28 | + * | 0 0 15 16 | | 0 0 9 10 | | 0 0 33 36 | + * \ 0 0 0 20 / \ 0 0 0 11 / \ 0 0 0 42 / + */ - Matrix mResult; - mResult.reset(); - mResult.setDimensions( rows, cols ); - - mResult = m; - - RealType matrixMultiplicator = 2; - RealType thisMatrixMultiplicator = 1; - - mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); - - EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); - EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); - EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); - EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); - - EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); - EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); - EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); - EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); - - EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); - EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); - EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); - EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); - - EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); - EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); - EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); - EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); - - EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); - EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); - EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); - EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); - - EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); - EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); - EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); - EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); - - EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); - EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); - EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); - EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); - - EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); - EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); - EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); - EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); - - EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); - EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); - EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); - EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); - - EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); - EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); - EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); - EXPECT_EQ( mResult.getElement( 4, 3 ), 60 ); + Matrix1 mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 11 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 14 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 17 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 22 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 25 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 28 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 42 ); } template< typename Matrix > @@ -1162,43 +1166,44 @@ void test_AssignmentOperator() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - hostMatrix.setElement( i, j, i + j ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + hostMatrix.setElement( i, j, i + j ); Matrix matrix( rows, columns ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) - { - if( j > i ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else - EXPECT_EQ( matrix.getElement( i, j ), i + j ); - } + if( abs( i - j ) <= 1 ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); #ifdef HAVE_CUDA TridiagonalCuda cudaMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - cudaMatrix.setElement( i, j, i + j ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; matrix = cudaMatrix; - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j < rows; j++ ) + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) { - if( j > i ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else + if( abs( i - j ) <= 1 ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } #endif } @@ -1207,123 +1212,125 @@ void test_AssignmentOperator() template< typename Matrix > void test_SaveAndLoad() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 4x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * \ 13 14 15 16 / - */ - const IndexType rows = 4; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix savedMatrix; - savedMatrix.reset(); - savedMatrix.setDimensions( rows, cols ); + /* + * Sets up the following 4x4 dense matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * \ 0 0 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - savedMatrix.setElement( i, j, value++ ); - - ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - - Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( rows, cols ); - - ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); + Matrix savedMatrix( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) <= 1 ) + savedMatrix.setElement( i, j, value ); + value++; + } + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); } template< typename Matrix > void test_Print() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - #include <sstream> - std::stringstream printed; - std::stringstream couted; + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + std::stringstream printed; + std::stringstream couted; - m.print( std::cout ); //all the std::cout goes to ss + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); - std::cout.rdbuf(old_buf); //reset + m.print( std::cout ); //all the std::cout goes to ss - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" - "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" - "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" - "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" - "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1\t Col:1->2\t\n" + "Row: 1 -> Col:0->5\t Col:1->6\t Col:2->7\t\n" + "Row: 2 -> Col:1->10\t Col:2->11\t Col:3->12\t\n" + "Row: 3 -> Col:2->15\t Col:3->16\t\n" + "Row: 4 -> Col:3->20\t\n"; - EXPECT_EQ( printed.str(), couted.str() ); + EXPECT_EQ( printed.str(), couted.str() ); } // test fixture for typed tests @@ -1470,6 +1477,19 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } +TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) +{ + using MatrixType = typename TestFixture::MatrixType; + + using RealType = typename MatrixType::RealType; + using DeviceType = typename MatrixType::DeviceType; + using IndexType = typename MatrixType::IndexType; + using RealAllocatorType = typename MatrixType::RealAllocatorType; + using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >; + + test_AddMatrix< MatrixType, MatrixType2 >(); +} + TYPED_TEST( MatrixTest, assignmentOperatorTest ) { using MatrixType = typename TestFixture::MatrixType;