diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index f86def78ed2f9e7cda6520e25be27181ae0accb0..2f194c76de5c6495ef380c043c3ef3593edcd7e2 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -44,26 +44,43 @@ class CSR /** * \brief Number segments. */ + __cuda_callable__ IndexType getSize() const; + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ IndexType getStorageSize() const; + __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; /*** * \brief Go over all segments and for each segment element call - * function 'f' with arguments 'args' + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + template< typename Function, typename... Args > void forAll( Function& f, Args... args ) const; + /*** * \brief Go over all segments and perform a reduction in each of them. */ - template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args > - void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args ); + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); + protected: @@ -75,4 +92,4 @@ class CSR } // namespace Conatiners } // namespace TNL -#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/CSR.hpp> diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ea45b40ba9d67b16203e0e7665859f5390653b9f..84a0fcb346e4fc8df0e9e3fd49e30ba082164fa5 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -11,7 +11,7 @@ #pragma once #include <TNL/Containers/Vector.h> -#include <TNL/Algorithms/ParalleFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/Segments/CSR.h> namespace TNL { @@ -43,68 +43,147 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index > + template< typename SizesHolder > +void CSR< Device, Index >:: -void setSegmentsCount( const IndexType& size ) +setSizes( const SizesHolder& sizes ) { - this->offsets.setSize( size + 1 ); + this->offsets.setSize( sizes.getSize() + 1 ); + auto view = this->offsets.getView( 0, sizes.getSize() ); + view = sizes; + this->offsets.setElement( sizes.getSize(), 0 ); + this->offsets.template scan< Algorithms::ScanType::Exclusive >(); } template< typename Device, typename Index > - template< typename SizesHolder = OffsetsHolder > +__cuda_callable__ +Index CSR< Device, Index >:: -void setSizes( const SizesHolder& sizes ) +getSize() const { - this->offsets.setSize( sizes.getSize() + 1 ); - auto view = this->offsets.getView( 0, sizes.getSize() ); - view = sizes; - this->offsets.setElement( sizes.getSize>(), 0 ); - this->offsets.scan< Algorithms::ScanType::Exclusive >(); + return this->offsets.getSize() - 1; } template< typename Device, typename Index > +__cuda_callable__ +Index CSR< Device, Index >:: -Index getSize() const +getSegmentSize( const IndexType segmentIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +#else + return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); +#endif + } + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSR< Device, Index >:: +getStorageSize() const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ this->getSize() ]; +#else + return offsets.getElement( this->getSize() ); +#endif + } + return offsets[ this->getSize() ]; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSR< Device, Index >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index > +__cuda_callable__ +void +CSR< Device, Index >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { - return this->offsets.getSize() - 1; } template< typename Device, typename Index > template< typename Function, typename... Args > +void CSR< Device, Index >:: -void forAll( Function& f, Args args ) const +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); - auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; for( IndexType j = begin; j < end; j++ ) - f( i, j, args ); + if( ! f( i, j, args... ) ) + break; }; - Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +CSR< Device, Index >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, typename Index > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void CSR< Device, Index >:: -void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args ) +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { const auto offsetsView = this->offsets.getView(); - auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; Real aux( zero ); for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args ) ); + reduction( aux, fetch( i, j, args... ) ); keeper( i, aux ); }; - Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } - +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSR< Device, Index >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) +{ + this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); +} } // namespace Segements } // namespace Conatiners -} // namespace TNL \ No newline at end of file +} // namespace TNL diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h index 599e5ad335e5d0015f01254b00ec16297ad1a163..a93c7a893fe82da5fbe3b188dd493e03fdbe02a1 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix_impl.h @@ -163,6 +163,7 @@ void Matrix< Real, Device, Index, RealAllocator >::reset() { this->rows = 0; this->columns = 0; + this->values.reset(); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index e266dc66db19ff67e44248a0565a481062989aab..7581ef090bc9bda5c7c5736cefc4797a193d76b5 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -55,9 +55,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > virtual String getSerializationTypeVirtual() const; - void setDimensions( const IndexType rows, - const IndexType columns ); - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; @@ -85,6 +82,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > bool setElement( const IndexType row, const IndexType column, const RealType& value ); + __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, @@ -189,10 +187,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void load( const String& fileName ); void print( std::ostream& str ) const; - + + __cuda_callable__ + IndexType getPaddingIndex() const; protected: - ColumnsVectorType columnsVector; + ColumnsVectorType columnIndexes; + + SegmentsType segments; + + IndexAllocator indexAlloctor; + + RealAllocator realAllocator; }; } // namespace Conatiners diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index abfc1619dbb2aae26d9d4586b0f43dd3a8339a0e..1ccb602ef08467decd7c8c85371a66e57bdeb43f 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -24,7 +24,7 @@ namespace Matrices { SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) - : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator ) + : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator ) { } @@ -36,7 +36,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix& m ) - : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector ) + : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes ) { } @@ -48,7 +48,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix&& m ) - : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) ) + : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) ) { } @@ -63,10 +63,10 @@ SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) -: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator ) +: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { } - + template< typename Real, template< typename, typename > class Segments, typename Device, @@ -96,20 +96,6 @@ getSerializationTypeVirtual() const return this->getSerializationType(); } -template< typename Real, - template< typename, typename > class Segments, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > -void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -setDimensions( const IndexType rows, - const IndexType columns ) -{ - -} - template< typename Real, template< typename, typename > class Segments, typename Device, @@ -120,7 +106,12 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { - + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); + this->segments.setSizes( rowLengths ); + this->values.setSize( this->segments.getStorageSize() ); + this->values = ( RealType ) 0; + this->columnIndexes.setSize( this->segments.getStorageSize() ); + this->columnIndexes = this->getPaddingIndex(); } template< typename Real, @@ -188,7 +179,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { - + Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } template< typename Real, @@ -213,7 +204,9 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: reset() { - + Matrix< Real, Device, Index >::reset(); + this->columnIndexes.reset(); + } template< typename Real, @@ -229,7 +222,7 @@ setElementFast( const IndexType row, const IndexType column, const RealType& value ) { - + return this->addElementFast( row, column, value, 0.0 ); } template< typename Real, @@ -244,7 +237,7 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) { - + return this->addElement( row, column, value, 0.0 ); } template< typename Real, @@ -277,7 +270,56 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - + TNL_ASSERT( row >= 0 && row < this->rows && + column >= 0 && column < this->columns, + std::cerr << " row = " << row + << " column = " << column + << " this->rows = " << this->rows + << " this->columns = " << this->columns ); + + const IndexType rowSize = this->segments.getSegmentSize( row ); + IndexType col( this->getPaddingIndex() ); + IndexType i; + IndexType globalIdx; + for( i = 0; i < rowSize; i++ ) + { + globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + { + this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); + return true; + } + if( col == this->getPaddingIndex() || col > column ) + break; + } + if( i == rowSize ) + return false; + if( col == this->getPaddingIndex() ) + { + this->columnIndexes.setElement( globalIdx, column ); + this->values.setElement( globalIdx, value ); + return true; + } + else + { + IndexType j = rowSize - 1; + while( j > i ) + { + const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); + const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); + TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); + TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); + this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); + this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); + j--; + } + + this->columnIndexes.setElement( globalIdx, column ); + this->values.setElement( globalIdx, value ); + return true; + } } @@ -377,16 +419,25 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { - + const IndexType rowSize = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowSize; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + const IndexType col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + return this->values.getElement( globalIdx ); + } + return 0.0; } -__cuda_callable__ template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > +__cuda_callable__ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getRowFast( const IndexType row, @@ -604,6 +655,19 @@ print( std::ostream& str ) const } +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getPaddingIndex() const +{ + return -1; +} } //namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h index dda95e68bb5222bd1094eec059db3f1355b03beb..889d92e627a9c6f456473eeaaaa3cb9d5158f93e 100644 --- a/src/TNL/Matrices/Sparse_impl.h +++ b/src/TNL/Matrices/Sparse_impl.h @@ -75,7 +75,6 @@ template< typename Real, void Sparse< Real, Device, Index >::reset() { Matrix< Real, Device, Index >::reset(); - this->values.reset(); this->columnIndexes.reset(); } diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 00654de3c10724f60a0123d267b399eaf9eac147..a14148151b7371bcb693b43dc571e8eeb61f969f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -108,7 +108,7 @@ TYPED_TEST( CSRMatrixTest, addElementTest ) test_AddElement< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, setRowTest ) +/*TYPED_TEST( CSRMatrixTest, setRowTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; @@ -134,7 +134,7 @@ TYPED_TEST( CSRMatrixTest, printTest ) using CSRMatrixType = typename TestFixture::CSRMatrixType; test_Print< CSRMatrixType >(); -} +}*/ #endif