Loading src/TNL/Algorithms/Segments/CSR.h +4 −2 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ namespace TNL { template< typename Device, typename Index, CSRKernelTypes KernelType_ = CSRScalar, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { Loading @@ -36,6 +37,7 @@ class CSR using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< IndexType > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; CSRKernelTypes KernelType = KernelType_; CSR(); Loading Loading @@ -114,8 +116,8 @@ class CSR CSR& operator=( const CSR& rhsSegments ) = default; template< typename Device_, typename Index_, typename IndexAllocator_ > CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ); template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ > CSR& operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source ); void save( File& file ) const; Loading src/TNL/Algorithms/Segments/CSR.hpp +51 −28 Original line number Diff line number Diff line Loading @@ -22,16 +22,18 @@ namespace TNL { template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR() { } template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const SegmentsSizes& segmentsSizes ) { this->setSegmentsSizes( segmentsSizes ); Loading @@ -39,16 +41,18 @@ CSR( const SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const CSR& csr ) : offsets( csr.offsets ) { } template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) { Loading @@ -56,9 +60,10 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > String CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getSerializationType() { return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; Loading @@ -66,9 +71,10 @@ getSerializationType() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > String CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentsType() { return ViewType::getSegmentsType(); Loading @@ -76,10 +82,11 @@ getSegmentsType() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename SizesHolder > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: setSegmentsSizes( const SizesHolder& sizes ) { details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); Loading @@ -87,9 +94,10 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: reset() { this->offsets.setSize( 1 ); Loading @@ -99,9 +107,10 @@ reset() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > typename CSR< Device, Index, IndexAllocator >::ViewType CSR< Device, Index, IndexAllocator >:: typename CSR< Device, Index, KernelType_, IndexAllocator >::ViewType CSR< Device, Index, KernelType_, IndexAllocator >:: getView() { return ViewType( this->offsets.getView() ); Loading @@ -109,9 +118,10 @@ getView() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > auto CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getConstView() const -> const ConstViewType { return ConstViewType( this->offsets.getConstView() ); Loading @@ -119,8 +129,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentsCount() const -> IndexType { return this->offsets.getSize() - 1; Loading @@ -128,8 +139,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); Loading @@ -137,8 +149,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSize() const -> IndexType { return this->getStorageSize(); Loading @@ -146,8 +159,9 @@ getSize() const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getStorageSize() const -> IndexType { return details::CSR< Device, Index >::getStorageSize( this->offsets ); Loading @@ -155,8 +169,9 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( ! std::is_same< DeviceType, Devices::Host >::value ) Loading @@ -172,10 +187,11 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); Loading @@ -183,10 +199,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Function, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); Loading @@ -194,10 +211,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator> template< typename Function, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); Loading @@ -205,10 +223,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); Loading @@ -216,10 +235,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); Loading @@ -227,11 +247,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Device_, typename Index_, typename IndexAllocator_ > CSR< Device, Index, IndexAllocator >& CSR< Device, Index, IndexAllocator >:: operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ > CSR< Device, Index, KernelType_, IndexAllocator >& CSR< Device, Index, KernelType_, IndexAllocator >:: operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source ) { this->offsets = source.offsets; return *this; Loading @@ -239,9 +260,10 @@ operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: save( File& file ) const { file << this->offsets; Loading @@ -249,9 +271,10 @@ save( File& file ) const template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: load( File& file ) { file >> this->offsets; Loading src/TNL/Algorithms/Segments/CSRView.h +6 −2 Original line number Diff line number Diff line Loading @@ -19,8 +19,11 @@ namespace TNL { namespace Algorithms { namespace Segments { enum CSRKernelTypes { CSRScalarKernel, CSRVectorKernel, CSRLightKernel }; template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ = CSRScalar > class CSRView { public: Loading @@ -34,6 +37,7 @@ class CSRView using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; CSRKernelTypes KernelType = KernelType_; __cuda_callable__ CSRView(); Loading src/TNL/Algorithms/Segments/CSRView.hpp +81 −56 Original line number Diff line number Diff line Loading @@ -22,122 +22,136 @@ namespace TNL { template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView() { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const OffsetsView& offsets_view ) : offsets( offsets_view ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const OffsetsView&& offsets_view ) : offsets( offsets_view ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const CSRView& csr_view ) : offsets( csr_view.offsets ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const CSRView&& csr_view ) : offsets( std::move( csr_view.offsets ) ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > String CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getSerializationType() { return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > String CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getSegmentsType() { return "CSR"; } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ typename CSRView< Device, Index >::ViewType CSRView< Device, Index >:: typename CSRView< Device, Index, KernelType_ >::ViewType CSRView< Device, Index, KernelType_ >:: getView() { return ViewType( this->offsets ); } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getConstView() const -> const ConstViewType { return ConstViewType( this->offsets.getConstView() ); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSegmentsCount() const -> IndexType { return this->offsets.getSize() - 1; } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSize() const -> IndexType { return this->getStorageSize(); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getStorageSize() const -> IndexType { return details::CSR< Device, Index >::getStorageSize( this->offsets ); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( ! std::is_same< DeviceType, Devices::Host >::value ) Loading @@ -152,20 +166,22 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Function, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets; Loading @@ -181,24 +197,28 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Function, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; const auto offsetsView = this->offsets.getConstView(); if( KernelType == CSRScalar ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsetsView[ segmentIdx ]; const IndexType end = offsetsView[ segmentIdx + 1 ]; Loading @@ -211,21 +231,24 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, typename Index > CSRView< Device, Index >& CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > CSRView< Device, Index, KernelType_ >& CSRView< Device, Index, KernelType_ >:: operator=( const CSRView& view ) { this->offsets.bind( view.offsets ); Loading @@ -233,18 +256,20 @@ operator=( const CSRView& view ) } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: save( File& file ) const { file << this->offsets; } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: load( File& file ) { file >> this->offsets; Loading Loading
src/TNL/Algorithms/Segments/CSR.h +4 −2 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ namespace TNL { template< typename Device, typename Index, CSRKernelTypes KernelType_ = CSRScalar, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { Loading @@ -36,6 +37,7 @@ class CSR using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< IndexType > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; CSRKernelTypes KernelType = KernelType_; CSR(); Loading Loading @@ -114,8 +116,8 @@ class CSR CSR& operator=( const CSR& rhsSegments ) = default; template< typename Device_, typename Index_, typename IndexAllocator_ > CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ); template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ > CSR& operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source ); void save( File& file ) const; Loading
src/TNL/Algorithms/Segments/CSR.hpp +51 −28 Original line number Diff line number Diff line Loading @@ -22,16 +22,18 @@ namespace TNL { template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR() { } template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const SegmentsSizes& segmentsSizes ) { this->setSegmentsSizes( segmentsSizes ); Loading @@ -39,16 +41,18 @@ CSR( const SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const CSR& csr ) : offsets( csr.offsets ) { } template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) { Loading @@ -56,9 +60,10 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > String CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getSerializationType() { return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; Loading @@ -66,9 +71,10 @@ getSerializationType() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > String CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentsType() { return ViewType::getSegmentsType(); Loading @@ -76,10 +82,11 @@ getSegmentsType() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename SizesHolder > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: setSegmentsSizes( const SizesHolder& sizes ) { details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); Loading @@ -87,9 +94,10 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: reset() { this->offsets.setSize( 1 ); Loading @@ -99,9 +107,10 @@ reset() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > typename CSR< Device, Index, IndexAllocator >::ViewType CSR< Device, Index, IndexAllocator >:: typename CSR< Device, Index, KernelType_, IndexAllocator >::ViewType CSR< Device, Index, KernelType_, IndexAllocator >:: getView() { return ViewType( this->offsets.getView() ); Loading @@ -109,9 +118,10 @@ getView() template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > auto CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getConstView() const -> const ConstViewType { return ConstViewType( this->offsets.getConstView() ); Loading @@ -119,8 +129,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentsCount() const -> IndexType { return this->offsets.getSize() - 1; Loading @@ -128,8 +139,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); Loading @@ -137,8 +149,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSize() const -> IndexType { return this->getStorageSize(); Loading @@ -146,8 +159,9 @@ getSize() const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getStorageSize() const -> IndexType { return details::CSR< Device, Index >::getStorageSize( this->offsets ); Loading @@ -155,8 +169,9 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: __cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( ! std::is_same< DeviceType, Devices::Host >::value ) Loading @@ -172,10 +187,11 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); Loading @@ -183,10 +199,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Function, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); Loading @@ -194,10 +211,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator> template< typename Function, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); Loading @@ -205,10 +223,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); Loading @@ -216,10 +235,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); Loading @@ -227,11 +247,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Device_, typename Index_, typename IndexAllocator_ > CSR< Device, Index, IndexAllocator >& CSR< Device, Index, IndexAllocator >:: operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ > CSR< Device, Index, KernelType_, IndexAllocator >& CSR< Device, Index, KernelType_, IndexAllocator >:: operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source ) { this->offsets = source.offsets; return *this; Loading @@ -239,9 +260,10 @@ operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: save( File& file ) const { file << this->offsets; Loading @@ -249,9 +271,10 @@ save( File& file ) const template< typename Device, typename Index, CSRKernelTypes KernelType_, typename IndexAllocator > void CSR< Device, Index, IndexAllocator >:: CSR< Device, Index, KernelType_, IndexAllocator >:: load( File& file ) { file >> this->offsets; Loading
src/TNL/Algorithms/Segments/CSRView.h +6 −2 Original line number Diff line number Diff line Loading @@ -19,8 +19,11 @@ namespace TNL { namespace Algorithms { namespace Segments { enum CSRKernelTypes { CSRScalarKernel, CSRVectorKernel, CSRLightKernel }; template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ = CSRScalar > class CSRView { public: Loading @@ -34,6 +37,7 @@ class CSRView using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; CSRKernelTypes KernelType = KernelType_; __cuda_callable__ CSRView(); Loading
src/TNL/Algorithms/Segments/CSRView.hpp +81 −56 Original line number Diff line number Diff line Loading @@ -22,122 +22,136 @@ namespace TNL { template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView() { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const OffsetsView& offsets_view ) : offsets( offsets_view ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const OffsetsView&& offsets_view ) : offsets( offsets_view ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const CSRView& csr_view ) : offsets( csr_view.offsets ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: CSRView( const CSRView&& csr_view ) : offsets( std::move( csr_view.offsets ) ) { } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > String CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getSerializationType() { return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > String CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getSegmentsType() { return "CSR"; } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ typename CSRView< Device, Index >::ViewType CSRView< Device, Index >:: typename CSRView< Device, Index, KernelType_ >::ViewType CSRView< Device, Index, KernelType_ >:: getView() { return ViewType( this->offsets ); } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getConstView() const -> const ConstViewType { return ConstViewType( this->offsets.getConstView() ); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSegmentsCount() const -> IndexType { return this->offsets.getSize() - 1; } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSize() const -> IndexType { return this->getStorageSize(); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getStorageSize() const -> IndexType { return details::CSR< Device, Index >::getStorageSize( this->offsets ); } template< typename Device, typename Index > __cuda_callable__ auto CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( ! std::is_same< DeviceType, Devices::Host >::value ) Loading @@ -152,20 +166,22 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > __cuda_callable__ auto CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Function, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets; Loading @@ -181,24 +197,28 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Function, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; const auto offsetsView = this->offsets.getConstView(); if( KernelType == CSRScalar ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsetsView[ segmentIdx ]; const IndexType end = offsetsView[ segmentIdx + 1 ]; Loading @@ -211,21 +231,24 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, typename Index > CSRView< Device, Index >& CSRView< Device, Index >:: typename Index, CSRKernelTypes KernelType_ > CSRView< Device, Index, KernelType_ >& CSRView< Device, Index, KernelType_ >:: operator=( const CSRView& view ) { this->offsets.bind( view.offsets ); Loading @@ -233,18 +256,20 @@ operator=( const CSRView& view ) } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: save( File& file ) const { file << this->offsets; } template< typename Device, typename Index > typename Index, CSRKernelTypes KernelType_ > void CSRView< Device, Index >:: CSRView< Device, Index, KernelType_ >:: load( File& file ) { file >> this->offsets; Loading