Loading src/TNL/Containers/Segments/CSR.h +21 −4 Original line number Diff line number Diff line Loading @@ -44,26 +44,43 @@ class CSR /** * \brief Number segments. */ __cuda_callable__ IndexType getSize() const; __cuda_callable__ IndexType getSegmentSize( const IndexType segmentIdx ) const; __cuda_callable__ IndexType getStorageSize() const; __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args' * function 'f' with arguments 'args'. The return type of 'f' is bool. * When its true, the for-loop continues. Once 'f' returns false, the for-loop * is terminated. */ template< typename Function, typename... Args > void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; template< typename Function, typename... Args > void forAll( Function& f, Args... args ) const; /*** * \brief Go over all segments and perform a reduction in each of them. */ template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args > void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args ); template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); protected: Loading @@ -75,4 +92,4 @@ class CSR } // namespace Conatiners } // namespace TNL #include <TNL/Containers/Segments/CSR.h> #include <TNL/Containers/Segments/CSR.hpp> src/TNL/Containers/Segments/CSR.hpp +101 −22 Original line number Diff line number Diff line Loading @@ -11,7 +11,7 @@ #pragma once #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/ParalleFor.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/Segments/CSR.h> namespace TNL { Loading Loading @@ -43,68 +43,147 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index > template< typename SizesHolder > void CSR< Device, Index >:: void setSegmentsCount( const IndexType& size ) setSizes( const SizesHolder& sizes ) { this->offsets.setSize( size + 1 ); this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); view = sizes; this->offsets.setElement( sizes.getSize(), 0 ); this->offsets.template scan< Algorithms::ScanType::Exclusive >(); } template< typename Device, typename Index > template< typename SizesHolder = OffsetsHolder > __cuda_callable__ Index CSR< Device, Index >:: void setSizes( const SizesHolder& sizes ) getSize() const { this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); view = sizes; this->offsets.setElement( sizes.getSize>(), 0 ); this->offsets.scan< Algorithms::ScanType::Exclusive >(); return this->offsets.getSize() - 1; } template< typename Device, typename Index > __cuda_callable__ Index CSR< Device, Index >:: Index getSize() const getSegmentSize( const IndexType segmentIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; #else return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); #endif } return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; } template< typename Device, typename Index > __cuda_callable__ Index CSR< Device, Index >:: getStorageSize() const { if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ return offsets[ this->getSize() ]; #else return offsets.getElement( this->getSize() ); #endif } return offsets[ this->getSize() ]; } template< typename Device, typename Index > __cuda_callable__ Index CSR< Device, Index >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ return offsets[ segmentIdx ] + localIdx; #else return offsets.getElement( segmentIdx ) + localIdx; #endif } return offsets[ segmentIdx ] + localIdx; } template< typename Device, typename Index > __cuda_callable__ void CSR< Device, Index >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { return this->offsets.getSize() - 1; } template< typename Device, typename Index > template< typename Function, typename... Args > void CSR< Device, Index >:: void forAll( Function& f, Args args ) const forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; for( IndexType j = begin; j < end; j++ ) f( i, j, args ); if( ! f( i, j, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } template< typename Device, typename Index > template< typename Function, typename... Args > void CSR< Device, Index >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, typename Index > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index >:: void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args ) segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { const auto offsetsView = this->offsets.getView(); auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; Real aux( zero ); for( IndexType j = begin; j < end; j++ ) reduction( aux, fetch( i, j, args ) ); reduction( aux, fetch( i, j, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } template< typename Device, typename Index > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); } } // namespace Segements } // namespace Conatiners } // namespace TNL src/TNL/Matrices/Matrix_impl.h +1 −0 Original line number Diff line number Diff line Loading @@ -163,6 +163,7 @@ void Matrix< Real, Device, Index, RealAllocator >::reset() { this->rows = 0; this->columns = 0; this->values.reset(); } template< typename Real, Loading src/TNL/Matrices/SparseMatrix.h +11 −5 Original line number Diff line number Diff line Loading @@ -55,9 +55,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > virtual String getSerializationTypeVirtual() const; void setDimensions( const IndexType rows, const IndexType columns ); void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; Loading Loading @@ -85,6 +82,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > bool setElement( const IndexType row, const IndexType column, const RealType& value ); __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, Loading Loading @@ -190,9 +188,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void print( std::ostream& str ) const; __cuda_callable__ IndexType getPaddingIndex() const; protected: ColumnsVectorType columnsVector; ColumnsVectorType columnIndexes; SegmentsType segments; IndexAllocator indexAlloctor; RealAllocator realAllocator; }; } // namespace Conatiners Loading src/TNL/Matrices/SparseMatrix.hpp +91 −27 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ namespace Matrices { SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator ) { } Loading @@ -36,7 +36,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix& m ) : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector ) : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes ) { } Loading @@ -48,7 +48,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix&& m ) : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) ) : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) ) { } Loading @@ -63,7 +63,7 @@ SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { } Loading Loading @@ -96,20 +96,6 @@ getSerializationTypeVirtual() const return this->getSerializationType(); } template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) { } template< typename Real, template< typename, typename > class Segments, typename Device, Loading @@ -120,7 +106,12 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); this->segments.setSizes( rowLengths ); this->values.setSize( this->segments.getStorageSize() ); this->values = ( RealType ) 0; this->columnIndexes.setSize( this->segments.getStorageSize() ); this->columnIndexes = this->getPaddingIndex(); } template< typename Real, Loading Loading @@ -188,7 +179,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } template< typename Real, Loading @@ -213,6 +204,8 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); this->columnIndexes.reset(); } Loading @@ -229,7 +222,7 @@ setElementFast( const IndexType row, const IndexType column, const RealType& value ) { return this->addElementFast( row, column, value, 0.0 ); } template< typename Real, Loading @@ -244,7 +237,7 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) { return this->addElement( row, column, value, 0.0 ); } template< typename Real, Loading Loading @@ -277,7 +270,56 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { TNL_ASSERT( row >= 0 && row < this->rows && column >= 0 && column < this->columns, std::cerr << " row = " << row << " column = " << column << " this->rows = " << this->rows << " this->columns = " << this->columns ); const IndexType rowSize = this->segments.getSegmentSize( row ); IndexType col( this->getPaddingIndex() ); IndexType i; IndexType globalIdx; for( i = 0; i < rowSize; i++ ) { globalIdx = this->segments.getGlobalIndex( row, i ); TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); col = this->columnIndexes.getElement( globalIdx ); if( col == column ) { this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); return true; } if( col == this->getPaddingIndex() || col > column ) break; } if( i == rowSize ) return false; if( col == this->getPaddingIndex() ) { this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); return true; } else { IndexType j = rowSize - 1; while( j > i ) { const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); j--; } this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); return true; } } Loading Loading @@ -377,16 +419,25 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { const IndexType rowSize = this->segments.getSegmentSize( row ); for( IndexType i = 0; i < rowSize; i++ ) { const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); const IndexType col = this->columnIndexes.getElement( globalIdx ); if( col == column ) return this->values.getElement( globalIdx ); } return 0.0; } __cuda_callable__ template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > __cuda_callable__ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getRowFast( const IndexType row, Loading Loading @@ -604,6 +655,19 @@ print( std::ostream& str ) const } template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return -1; } } //namespace Matrices } // namespace TNL Loading
src/TNL/Containers/Segments/CSR.h +21 −4 Original line number Diff line number Diff line Loading @@ -44,26 +44,43 @@ class CSR /** * \brief Number segments. */ __cuda_callable__ IndexType getSize() const; __cuda_callable__ IndexType getSegmentSize( const IndexType segmentIdx ) const; __cuda_callable__ IndexType getStorageSize() const; __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args' * function 'f' with arguments 'args'. The return type of 'f' is bool. * When its true, the for-loop continues. Once 'f' returns false, the for-loop * is terminated. */ template< typename Function, typename... Args > void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; template< typename Function, typename... Args > void forAll( Function& f, Args... args ) const; /*** * \brief Go over all segments and perform a reduction in each of them. */ template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args > void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args ); template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); protected: Loading @@ -75,4 +92,4 @@ class CSR } // namespace Conatiners } // namespace TNL #include <TNL/Containers/Segments/CSR.h> #include <TNL/Containers/Segments/CSR.hpp>
src/TNL/Containers/Segments/CSR.hpp +101 −22 Original line number Diff line number Diff line Loading @@ -11,7 +11,7 @@ #pragma once #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/ParalleFor.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/Segments/CSR.h> namespace TNL { Loading Loading @@ -43,68 +43,147 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index > template< typename SizesHolder > void CSR< Device, Index >:: void setSegmentsCount( const IndexType& size ) setSizes( const SizesHolder& sizes ) { this->offsets.setSize( size + 1 ); this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); view = sizes; this->offsets.setElement( sizes.getSize(), 0 ); this->offsets.template scan< Algorithms::ScanType::Exclusive >(); } template< typename Device, typename Index > template< typename SizesHolder = OffsetsHolder > __cuda_callable__ Index CSR< Device, Index >:: void setSizes( const SizesHolder& sizes ) getSize() const { this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); view = sizes; this->offsets.setElement( sizes.getSize>(), 0 ); this->offsets.scan< Algorithms::ScanType::Exclusive >(); return this->offsets.getSize() - 1; } template< typename Device, typename Index > __cuda_callable__ Index CSR< Device, Index >:: Index getSize() const getSegmentSize( const IndexType segmentIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; #else return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); #endif } return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; } template< typename Device, typename Index > __cuda_callable__ Index CSR< Device, Index >:: getStorageSize() const { if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ return offsets[ this->getSize() ]; #else return offsets.getElement( this->getSize() ); #endif } return offsets[ this->getSize() ]; } template< typename Device, typename Index > __cuda_callable__ Index CSR< Device, Index >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ return offsets[ segmentIdx ] + localIdx; #else return offsets.getElement( segmentIdx ) + localIdx; #endif } return offsets[ segmentIdx ] + localIdx; } template< typename Device, typename Index > __cuda_callable__ void CSR< Device, Index >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { return this->offsets.getSize() - 1; } template< typename Device, typename Index > template< typename Function, typename... Args > void CSR< Device, Index >:: void forAll( Function& f, Args args ) const forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; for( IndexType j = begin; j < end; j++ ) f( i, j, args ); if( ! f( i, j, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } template< typename Device, typename Index > template< typename Function, typename... Args > void CSR< Device, Index >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, typename Index > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index >:: void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args ) segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { const auto offsetsView = this->offsets.getView(); auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; Real aux( zero ); for( IndexType j = begin; j < end; j++ ) reduction( aux, fetch( i, j, args ) ); reduction( aux, fetch( i, j, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } template< typename Device, typename Index > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); } } // namespace Segements } // namespace Conatiners } // namespace TNL
src/TNL/Matrices/Matrix_impl.h +1 −0 Original line number Diff line number Diff line Loading @@ -163,6 +163,7 @@ void Matrix< Real, Device, Index, RealAllocator >::reset() { this->rows = 0; this->columns = 0; this->values.reset(); } template< typename Real, Loading
src/TNL/Matrices/SparseMatrix.h +11 −5 Original line number Diff line number Diff line Loading @@ -55,9 +55,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > virtual String getSerializationTypeVirtual() const; void setDimensions( const IndexType rows, const IndexType columns ); void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; Loading Loading @@ -85,6 +82,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > bool setElement( const IndexType row, const IndexType column, const RealType& value ); __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, Loading Loading @@ -190,9 +188,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void print( std::ostream& str ) const; __cuda_callable__ IndexType getPaddingIndex() const; protected: ColumnsVectorType columnsVector; ColumnsVectorType columnIndexes; SegmentsType segments; IndexAllocator indexAlloctor; RealAllocator realAllocator; }; } // namespace Conatiners Loading
src/TNL/Matrices/SparseMatrix.hpp +91 −27 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ namespace Matrices { SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator ) { } Loading @@ -36,7 +36,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix& m ) : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector ) : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes ) { } Loading @@ -48,7 +48,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix&& m ) : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) ) : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) ) { } Loading @@ -63,7 +63,7 @@ SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { } Loading Loading @@ -96,20 +96,6 @@ getSerializationTypeVirtual() const return this->getSerializationType(); } template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) { } template< typename Real, template< typename, typename > class Segments, typename Device, Loading @@ -120,7 +106,12 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); this->segments.setSizes( rowLengths ); this->values.setSize( this->segments.getStorageSize() ); this->values = ( RealType ) 0; this->columnIndexes.setSize( this->segments.getStorageSize() ); this->columnIndexes = this->getPaddingIndex(); } template< typename Real, Loading Loading @@ -188,7 +179,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } template< typename Real, Loading @@ -213,6 +204,8 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); this->columnIndexes.reset(); } Loading @@ -229,7 +222,7 @@ setElementFast( const IndexType row, const IndexType column, const RealType& value ) { return this->addElementFast( row, column, value, 0.0 ); } template< typename Real, Loading @@ -244,7 +237,7 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) { return this->addElement( row, column, value, 0.0 ); } template< typename Real, Loading Loading @@ -277,7 +270,56 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { TNL_ASSERT( row >= 0 && row < this->rows && column >= 0 && column < this->columns, std::cerr << " row = " << row << " column = " << column << " this->rows = " << this->rows << " this->columns = " << this->columns ); const IndexType rowSize = this->segments.getSegmentSize( row ); IndexType col( this->getPaddingIndex() ); IndexType i; IndexType globalIdx; for( i = 0; i < rowSize; i++ ) { globalIdx = this->segments.getGlobalIndex( row, i ); TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); col = this->columnIndexes.getElement( globalIdx ); if( col == column ) { this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); return true; } if( col == this->getPaddingIndex() || col > column ) break; } if( i == rowSize ) return false; if( col == this->getPaddingIndex() ) { this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); return true; } else { IndexType j = rowSize - 1; while( j > i ) { const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); j--; } this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); return true; } } Loading Loading @@ -377,16 +419,25 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { const IndexType rowSize = this->segments.getSegmentSize( row ); for( IndexType i = 0; i < rowSize; i++ ) { const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); const IndexType col = this->columnIndexes.getElement( globalIdx ); if( col == column ) return this->values.getElement( globalIdx ); } return 0.0; } __cuda_callable__ template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > __cuda_callable__ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getRowFast( const IndexType row, Loading Loading @@ -604,6 +655,19 @@ print( std::ostream& str ) const } template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return -1; } } //namespace Matrices } // namespace TNL