diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index a6acb52fd0d6ff86985df379205b89de5874d263..66f4fb236a6d10fe5e777278d37a749cfe10d16a 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -45,14 +45,14 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; template< typename Real, typename Device, typename Index > using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >; -template< typename Device, typename Index > -using EllpackSegments = Containers::Segments::Ellpack< Device, Index >; +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >; template< typename Real, typename Device, typename Index > using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >; -template< typename Device, typename Index > -using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index >; +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; template< typename Real, typename Device, typename Index > using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >; diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index ecd1de983ce309be9f2ea20575e45a9267b6b261..b83e43f1d146091219e3948afdbb94bfa0ae0b4e 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -10,22 +10,28 @@ #pragma once +#include <type_traits> + #include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/CSRView.h> namespace TNL { namespace Containers { namespace Segments { template< typename Device, - typename Index > + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { public: using DeviceType = Device; using IndexType = Index; - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; using SegmentsSizes = OffsetsHolder; + using ViewType = CSRView< Device, Index >; + using ConstViewType = CSRView< Device, std::add_const_t< Index > >; CSR(); @@ -41,6 +47,10 @@ class CSR template< typename SizesHolder = OffsetsHolder > void setSegmentsSizes( const SizesHolder& sizes ); + ViewType getView(); + + ConstViewType getConstView() const; + /** * \brief Number segments. */ diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ccb483125d423edde3237e8a69b3e24b83a3334e..a8f12e7dc3e3db1504a54a045e92b8cb5f0cbbfc 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -13,6 +13,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/details/CSR.h> namespace TNL { namespace Containers { @@ -20,64 +21,92 @@ namespace TNL { template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR() { } template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR( const SegmentsSizes& segmentsSizes ) { this->setSegmentsSizes( segmentsSizes ); } template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR( const CSR& csr ) : offsets( csr.offsets ) { } template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) { } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename SizesHolder > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: setSegmentsSizes( const SizesHolder& sizes ) { - this->offsets.setSize( sizes.getSize() + 1 ); + details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); + /*this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); view = sizes; this->offsets.setElement( sizes.getSize(), 0 ); - this->offsets.template scan< Algorithms::ScanType::Exclusive >(); + this->offsets.template scan< Algorithms::ScanType::Exclusive >();*/ } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > +typename CSR< Device, Index, IndexAllocator >::ViewType +CSR< Device, Index, IndexAllocator >:: +getView() +{ + return ViewType( this->offsets.getView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +typename CSR< Device, Index, IndexAllocator >::ConstViewType +CSR< Device, Index, IndexAllocator >:: +getConstView() const +{ + return ConstViewType( this->offsets.getConstView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSegmentsCount() const { return this->offsets.getSize() - 1; } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSegmentSize( const IndexType segmentIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -92,20 +121,22 @@ getSegmentSize( const IndexType segmentIdx ) const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSize() const { return this->getStorageSize(); } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getStorageSize() const { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -120,10 +151,11 @@ getStorageSize() const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -138,19 +170,21 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename Function, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getConstView(); @@ -166,20 +200,22 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator> template< typename Function, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); @@ -196,28 +232,31 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: save( File& file ) const { file << this->offsets; } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: load( File& file ) { file >> this->offsets; diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 5eeb7ecb3efafb5258f9100505874df4f8322538..2f89579702b543cf76f47d32e20bebf2c497828c 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -10,6 +10,8 @@ #pragma once +#include <type_traits> + #include <TNL/Containers/Vector.h> namespace TNL { @@ -24,13 +26,19 @@ class CSRView using DeviceType = Device; using IndexType = Index; - using OffsetsHolderView = typedef Containers::Vector< IndexType, DeviceType, IndexType >::ViewType; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType; + using ViewType = CSRView; + using ConstViewType = CSRView< Device, std::add_const_t< Index > >; __cuda_callable__ CSRView(); __cuda_callable__ - CSRView( const OffsetsHolderView& offsets ); + CSRView( const OffsetsView&& offsets ); + + __cuda_callable__ + CSRView( const ConstOffsetsView&& offsets ); __cuda_callable__ CSRView( const CSRView& csr_view ); @@ -38,6 +46,10 @@ class CSRView __cuda_callable__ CSRView( const CSRView&& csr_view ); + ViewType getView(); + + ConstViewType getConstView() const; + /** * \brief Number segments. */ @@ -96,7 +108,7 @@ class CSRView protected: - OffsetsHolderView offsets; + OffsetsView offsets; }; } // namespace Segements } // namespace Conatiners diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 30ed24071e7b42b8cd27cf959e890366682319ed..f50a74985f050f18b5913008636552e1bbf4f760 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -31,7 +31,7 @@ template< typename Device, typename Index > __cuda_callable__ CSRView< Device, Index >:: -CSRView( const OffsetsHolderView& offsets_view ) +CSRView( const OffsetsView&& offsets_view ) : offsets( offsets_view ) { } @@ -40,10 +40,18 @@ template< typename Device, typename Index > __cuda_callable__ CSRView< Device, Index >:: -CSRView( const CSRView& csr_view ) - : offsets( csr_view.offsest ) +CSRView( const ConstOffsetsView&& offsets_view ) + : offsets( offsets_view ) { +} +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const CSRView& csr_view ) + : offsets( csr_view.offsets ) +{ } template< typename Device, @@ -51,9 +59,26 @@ template< typename Device, __cuda_callable__ CSRView< Device, Index >:: CSRView( const CSRView&& csr_view ) - : offsets( std::move( csr_view.offsest ) ) + : offsets( std::move( csr_view.offsets ) ) +{ +} + +template< typename Device, + typename Index > +typename CSRView< Device, Index >::ViewType +CSRView< Device, Index >:: +getView() { + return ViewType( this->offsets ); +} +template< typename Device, + typename Index > +typename CSRView< Device, Index >::ConstViewType +CSRView< Device, Index >:: +getConstView() const +{ + return ConstViewType( this->offsets.getConstView() ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index b08ad0f04f9d316f6e2ce62aea5d8990c1204978..9c81a84281925b2ec971bceba5a161aa464c83e6 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -11,6 +11,7 @@ #pragma once #include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/EllpackView.h> namespace TNL { namespace Containers { @@ -18,6 +19,7 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, int Alignment = 32 > class Ellpack @@ -30,6 +32,9 @@ class Ellpack static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; + using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; + //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; + Ellpack(); @@ -41,6 +46,10 @@ class Ellpack Ellpack( const Ellpack&& segments ); + ViewType getView(); + + //ConstViewType getConstView() const; + /** * \brief Set sizes of particular segments. */ diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 337009e99a7b0a3493b5015190c5d915ea6714ce..482c87d4f36274edf8aef622f576ce07773df3b9 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -21,9 +21,10 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack() : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -31,9 +32,10 @@ Ellpack() template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const SegmentsSizes& segmentsSizes ) : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -42,9 +44,10 @@ Ellpack( const SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -53,9 +56,10 @@ Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const Ellpack& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -63,9 +67,10 @@ Ellpack( const Ellpack& ellpack ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const Ellpack&& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -73,11 +78,35 @@ Ellpack( const Ellpack&& ellpack ) template< typename Device, typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ViewType +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getView() +{ + return ViewType( segmentSize, size, alignedSize ); +} + +/*template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ConstViewType +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getConstView() const +{ + return ConstViewType( segmentSize, size, alignedSize ); +}*/ + +template< typename Device, + typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename SizesHolder > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: setSegmentsSizes( const SizesHolder& sizes ) { this->segmentSize = max( sizes ); @@ -90,10 +119,11 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) { this->segmentSize = segmentSize; @@ -107,11 +137,12 @@ setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentsCount() const { return this->size; @@ -119,11 +150,12 @@ getSegmentsCount() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentSize( const IndexType segmentIdx ) const { return this->segmentSize; @@ -131,11 +163,12 @@ getSegmentSize( const IndexType segmentIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSize() const { return this->size * this->segmentSize; @@ -144,11 +177,12 @@ getSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getStorageSize() const { return this->alignedSize * this->segmentSize; @@ -156,11 +190,12 @@ getStorageSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { if( RowMajorOrder ) @@ -171,22 +206,24 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { if( RowMajorOrder ) @@ -220,11 +257,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); @@ -232,11 +270,12 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { if( RowMajorOrder ) @@ -272,11 +311,12 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -284,10 +324,11 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: save( File& file ) const { file.save( &segmentSize ); @@ -297,10 +338,11 @@ save( File& file ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: load( File& file ) { file.load( &segmentSize ); diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h new file mode 100644 index 0000000000000000000000000000000000000000..adbfee629c03d9ff49c572781cabc4a95c0ee0ba --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -0,0 +1,111 @@ +/*************************************************************************** + EllpackView.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> + + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class EllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + using ViewType = EllpackView; + //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; + + __cuda_callable__ + EllpackView(); + + __cuda_callable__ + EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ); + + __cuda_callable__ + EllpackView( const EllpackView& ellpackView ); + + __cuda_callable__ + EllpackView( const EllpackView&& ellpackView ); + + ViewType getView(); + + //ConstViewType getConstView() const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/EllpackView.hpp> diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d124633ff68f1d64230d754a6d81dd87a6fc0117 --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -0,0 +1,293 @@ +/*************************************************************************** + EllpackView.hpp - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/EllpackView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView() + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ) + : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( const EllpackView& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( const EllpackView&& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getView() +{ + return ViewType( segmentSize, size, alignedSize ); +} + +/*template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getConstView() const +{ + return ConstViewType( segmentSize, size, alignedSize ); +}*/ + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentsCount() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSize() const +{ + return this->size * this->segmentSize; +} + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getStorageSize() const +{ + return this->alignedSize * this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx; + const IndexType end = storageSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + if( RowMajorOrder ) + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j += alignedSize ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +save( File& file ) const +{ + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +load( File& file ) +{ + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index ecc2c8c7ef1d8fa24d418372c07c2f769ab75cc9..fc514c51f3edcff69682e35378515c9d90dc8ffc 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -11,6 +11,7 @@ #pragma once #include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/SlicedEllpackView.h> namespace TNL { namespace Containers { @@ -18,6 +19,7 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, int SliceSize = 32 > class SlicedEllpack @@ -26,9 +28,11 @@ class SlicedEllpack using DeviceType = Device; using IndexType = Index; - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; static constexpr int getSliceSize() { return SliceSize; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; SlicedEllpack(); @@ -38,6 +42,10 @@ class SlicedEllpack SlicedEllpack( const SlicedEllpack&& segments ); + ViewType getView(); + + ConstViewType getConstView() const; + /** * \brief Set sizes of particular segments. */ diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index d721edb00b2119bce445a8ec6c3929414f5cd262..bdf28ff73b46de6e9423eebcd03849d7ef5cca2b 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -22,9 +22,10 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack() : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -32,9 +33,10 @@ SlicedEllpack() template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -43,9 +45,10 @@ SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack( const SlicedEllpack& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), @@ -55,9 +58,10 @@ SlicedEllpack( const SlicedEllpack& slicedEllpack ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack( const SlicedEllpack&& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), @@ -67,11 +71,36 @@ SlicedEllpack( const SlicedEllpack&& slicedEllpack ) template< typename Device, typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ViewType +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getView() +{ + return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ConstViewType +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getConstView() const +{ + return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename SizesHolder > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: setSegmentsSizes( const SizesHolder& sizes ) { this->segmentsCount = sizes.getSize(); @@ -79,7 +108,7 @@ setSegmentsSizes( const SizesHolder& sizes ) this->sliceOffsets.setSize( slicesCount + 1 ); this->sliceOffsets = 0; this->sliceSegmentSizes.setSize( slicesCount ); - Ellpack< DeviceType, IndexType, true > ellpack; + Ellpack< DeviceType, IndexType, IndexAllocator, true > ellpack; ellpack.setSegmentsSizes( slicesCount, SliceSize ); const IndexType _size = sizes.getSize(); @@ -106,11 +135,12 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSegmentsCount() const { return this->segmentsCount; @@ -118,11 +148,12 @@ getSegmentsCount() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSegmentSize( const IndexType segmentIdx ) const { const Index sliceIdx = segmentIdx / SliceSize; @@ -140,11 +171,12 @@ getSegmentSize( const IndexType segmentIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSize() const { return this->size; @@ -152,11 +184,12 @@ getSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getStorageSize() const { return this->alignedSize; @@ -164,11 +197,12 @@ getStorageSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -197,22 +231,24 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); @@ -251,11 +287,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -263,11 +300,12 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); @@ -307,11 +345,12 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -319,10 +358,11 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: save( File& file ) const { file.save( &size ); @@ -334,10 +374,11 @@ save( File& file ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: load( File& file ) { file.load( &size ); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h new file mode 100644 index 0000000000000000000000000000000000000000..275baacf5f5cdc2f367bee8ece316a4106f47ef8 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -0,0 +1,116 @@ +/*************************************************************************** + SlicedEllpackView.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using ViewType = SlicedEllpackView; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; + + __cuda_callable__ + SlicedEllpackView(); + + __cuda_callable__ + SlicedEllpackView( IndexType size, + IndexType alignedSize, + IndexType segmentsCount, + OffsetsView&& sliceOffsets, + OffsetsView&& sliceSegmentSizes ); + + __cuda_callable__ + SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ); + + __cuda_callable__ + SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ); + + ViewType getView(); + + ConstViewType getConstView() const; + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsView sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/SlicedEllpackView.hpp> diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f2e03bd38aab3de36e32feb929202b2ede2b8c34 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -0,0 +1,342 @@ +/*************************************************************************** + SlicedEllpackView.hpp - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/SlicedEllpackView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView() + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( IndexType size, + IndexType alignedSize, + IndexType segmentsCount, + OffsetsView&& sliceOffsets, + OffsetsView&& sliceSegmentSizes ) + : size( size ), alignedSize( alignedSize ), segmentsCount( segmentsCount ), + sliceOffsets( std::forward< OffsetsView >( sliceOffsets ) ), sliceSegmentSizes( std::forward< OffsetsView >( sliceSegmentSizes ) ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ) + : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), + segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), + sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ) + : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), + segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), + sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getView() +{ + return ViewType( size, alignedSize, segmentsCount, sliceOffsets, sliceSegmentSizes ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getConstView() const +{ + return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentsCount() const +{ + return this->segmentsCount; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + const Index sliceIdx = segmentIdx / SliceSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + return this->sliceSegmentSizes[ sliceIdx ]; + else + { +#ifdef __CUDA_ARCH__ + return this->sliceSegmentSizes[ sliceIdx ]; +#else + return this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getStorageSize() const +{ + return this->alignedSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + IndexType sliceOffset, segmentSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + { + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + } + else + { +#ifdef __CUDA__ARCH__ + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; +#else + sliceOffset = this->sliceOffsets.getElement( sliceIdx ); + segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } + if( RowMajorOrder ) + return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; + else + return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) + reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + RealType aux( zero ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +save( File& file ) const +{ + file.save( &size ); + file.save( &alignedSize ); + file.save( &segmentsCount ); + file << this->sliceOffsets; + file << this->sliceSegmentSizes; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +load( File& file ) +{ + file.load( &size ); + file.load( &alignedSize ); + file.load( &segmentsCount ); + file >> this->sliceOffsets; + file >> this->sliceSegmentSizes; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..47e768d289cb307957c117402a4a45ce8cd54c7e --- /dev/null +++ b/src/TNL/Containers/Segments/details/CSR.h @@ -0,0 +1,89 @@ +/*************************************************************************** + CSR.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + + +namespace TNL { + namespace Containers { + namespace Segments { + namespace details { + +template< typename Device, + typename Index > +class CSR +{ + public: + + using DeviceType = Device; + using IndexType = Index; + + template< typename SizesHolder, typename CSROffsets > + static void setSegmentsSizes( const SizesHolder& sizes, CSROffsets& offsets ) + { + offsets.setSize( sizes.getSize() + 1 ); + auto view = offsets.getView( 0, sizes.getSize() ); + view = sizes; + offsets.setElement( sizes.getSize(), 0 ); + offsets.template scan< Algorithms::ScanType::Exclusive >(); + } + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; +}; + } // namespace details + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/CSR.hpp> diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h new file mode 100644 index 0000000000000000000000000000000000000000..b08ad0f04f9d316f6e2ce62aea5d8990c1204978 --- /dev/null +++ b/src/TNL/Containers/Segments/details/Ellpack.h @@ -0,0 +1,107 @@ +/*************************************************************************** + Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class Ellpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + + Ellpack(); + + Ellpack( const SegmentsSizes& sizes ); + + Ellpack( const IndexType segmentsCount, const IndexType segmentSize ); + + Ellpack( const Ellpack& segments ); + + Ellpack( const Ellpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ); + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/Ellpack.hpp> diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..ecc2c8c7ef1d8fa24d418372c07c2f769ab75cc9 --- /dev/null +++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h @@ -0,0 +1,106 @@ +/*************************************************************************** + SlicedEllpack.h - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + + SlicedEllpack(); + + SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + SlicedEllpack( const SlicedEllpack& segments ); + + SlicedEllpack( const SlicedEllpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsHolder sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/SlicedEllpack.hpp> diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index b6a618e105f30ce43b276eea26ff095fabd1a30d..1512f8574d1e4a29534b61a63f1ebed40c25af82 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -17,7 +17,7 @@ namespace TNL { namespace Matrices { template< typename Real, - template< typename Device_, typename Index_ > class Segments, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments, typename Device = Devices::Host, typename Index = int, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, @@ -27,9 +27,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > public: using RealType = Real; - template< typename Device_, typename Index_ > - using SegmentsTemplate = Segments< Device_, Index_ >; - using SegmentsType = Segments< Device, Index >; + template< typename Device_, typename Index_, typename IndexAllocator_ > + using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; + using SegmentsType = Segments< Device, Index, IndexAllocator >; using DeviceType = Device; using IndexType = Index; using RealAllocatorType = RealAllocator; @@ -77,7 +77,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ IndexType getNonZeroRowLengthFast( const IndexType row ) const; - template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > + template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ); IndexType getNumberOfNonzeroMatrixElements() const; @@ -191,7 +191,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > // cross-device copy assignment template< typename Real2, - template< typename, typename > class Segments2, + template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 0d9ee0b0607e8579b3e89a03dd1c6616bf9b65f6..e24ed2f448b8b9dc8fe7a78f7f0b8af0431fbf77 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -18,7 +18,7 @@ namespace TNL { namespace Matrices { template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -31,7 +31,7 @@ SparseMatrix( const RealAllocatorType& realAllocator, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -43,7 +43,7 @@ SparseMatrix( const SparseMatrix& m ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -55,7 +55,7 @@ SparseMatrix( const SparseMatrix&& m ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -70,7 +70,7 @@ SparseMatrix( const IndexType rows, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -86,7 +86,7 @@ getSerializationType() } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -99,7 +99,7 @@ getSerializationTypeVirtual() const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -117,7 +117,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -143,7 +143,7 @@ getCompressedRowLengths( Vector& rowLengths ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -156,7 +156,7 @@ getRowLength( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -170,7 +170,7 @@ getRowLengthFast( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -183,7 +183,7 @@ getNonZeroRowLength( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -197,12 +197,12 @@ getNonZeroRowLengthFast( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > - template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > + template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) @@ -211,7 +211,7 @@ setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -229,7 +229,7 @@ getNumberOfNonzeroMatrixElements() const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -244,7 +244,7 @@ reset() } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -260,7 +260,7 @@ setElementFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -275,7 +275,7 @@ setElement( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -292,7 +292,7 @@ addElementFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -358,7 +358,7 @@ addElement( const IndexType row, template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -374,7 +374,7 @@ setRowFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -403,7 +403,7 @@ setRow( const IndexType row, template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -421,7 +421,7 @@ addRowFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -439,7 +439,7 @@ addRow( const IndexType row, template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -454,7 +454,7 @@ getElementFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -477,7 +477,7 @@ getElement( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -493,7 +493,7 @@ getRowFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -509,7 +509,7 @@ rowVectorProduct( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -544,7 +544,7 @@ vectorProduct( const InVector& inVector, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -567,7 +567,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -581,7 +581,7 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -605,7 +605,7 @@ forRows( IndexType first, IndexType last, Function& function ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -619,7 +619,7 @@ forAllRows( Function& function ) const } /*template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -635,7 +635,7 @@ addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -650,7 +650,7 @@ getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, }*/ template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -668,7 +668,7 @@ performSORIteration( const Vector1& b, // copy assignment template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -686,13 +686,13 @@ operator=( const SparseMatrix& matrix ) // cross-device copy assignment template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > template< typename Real2, - template< typename, typename > class Segments2, + template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, @@ -729,7 +729,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -744,7 +744,7 @@ save( File& file ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -759,7 +759,7 @@ load( File& file ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -772,7 +772,7 @@ save( const String& fileName ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -785,7 +785,7 @@ load( const String& fileName ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -811,7 +811,7 @@ print( std::ostream& str ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index acc75655f7003f80d920f64ceb710844ccdc376b..5e74f96b039b23102908c772f576b9592c1bb079 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -52,6 +52,16 @@ void test_SetSegmentsSizes_EqualSizes() for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); + + using SegmentsView = typename Segments::ViewType; + + SegmentsView segmentsView = segments.getView(); + EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize ); } template< typename Segments > @@ -89,6 +99,16 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly() for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); + + using SegmentsView = typename Segments::ViewType; + + SegmentsView segmentsView = segments.getView(); + EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize ); } template< typename Segments > @@ -136,6 +156,11 @@ void test_AllReduction_MaximumInSegments() for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); + + result_view = 0; + segments.getView().allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); } #endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index 2c0514c0aba69e5e26abdf1434b7a2d333c25b63..16c22d9cad957dd62cfaec4fe35303f41a5c002b 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -28,14 +28,14 @@ protected: //// // Row-major format is used for the host system -template< typename Device, typename Index > -using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, true, 32 >; +template< typename Device, typename Index, typename IndexAlocator > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; //// // Column-major format is used for GPUs -template< typename Device, typename Index > -using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, false, 32 >; +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index 5efcb1eae0a01e9d4267c8e07cdcd396ecb1015e..8597121e463a1d8349c1d8f93a9dd71a88f576fd 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -28,14 +28,14 @@ protected: //// // Row-major format is used for the host system -template< typename Device, typename Index > -using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >; +template< typename Device, typename Index, typename IndexAllocator > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; //// // Column-major format is used for GPUs -template< typename Device, typename Index > -using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >; +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types