From 2bd090cccf878ce1cede67d855d15e0ce0b3e6d2 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Tue, 3 Dec 2019 20:46:36 +0100 Subject: [PATCH] Added Ellpack segments. --- src/TNL/Containers/Segments/CSR.h | 5 +- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.h | 98 ++++++++++++ src/TNL/Containers/Segments/Ellpack.hpp | 190 ++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 5 deletions(-) create mode 100644 src/TNL/Containers/Segments/Ellpack.h create mode 100644 src/TNL/Containers/Segments/Ellpack.hpp diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index e3eff23427..52ca36e22c 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -16,7 +16,6 @@ namespace TNL { namespace Containers { namespace Segments { - template< typename Device, typename Index > class CSR @@ -36,7 +35,7 @@ class CSR CSR( const CSR&& segments ); /** - * \brief Set sizes of particular segmenets. + * \brief Set sizes of particular segments. */ template< typename SizesHolder = OffsetsHolder > void setSizes( const SizesHolder& sizes ); @@ -88,9 +87,7 @@ class CSR protected: OffsetsHolder offsets; - }; - } // namespace Segements } // namespace Conatiners } // namespace TNL diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index c99611958e..ecd52190c7 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -136,7 +136,7 @@ CSR< Device, Index >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; for( IndexType j = begin; j < end; j++ ) diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h new file mode 100644 index 0000000000..49f859afb4 --- /dev/null +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -0,0 +1,98 @@ +/*************************************************************************** + Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + int Alignment = 32 > +class Ellpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + static constexpr int getAlignment() { return Alignment; } + + Ellpack(); + + Ellpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + Ellpack( const Ellpack& segments ); + + Ellpack( const Ellpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSizes( const SizesHolder& sizes ); + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/Ellpack.hpp> diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp new file mode 100644 index 0000000000..0b62405146 --- /dev/null +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -0,0 +1,190 @@ +/*************************************************************************** + Ellpack.hpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/Ellpack.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index > +Ellpack< Device, Index >:: +Ellpack() : size( 0 ), rowLength( 0 ) +{ +} + +template< typename Device, + typename Index > +Ellpack< Device, Index >:: +Ellpack( const Ellpack& ellpack ) : offsets( ellpack.offsets ) +{ +} + +template< typename Device, + typename Index > +Ellpack< Device, Index >:: +Ellpack( const Ellpack&& ellpack ) : offsets( std::move( ellpack.offsets ) ) +{ + +} + +template< typename Device, + typename Index > + template< typename SizesHolder > +void +Ellpack< Device, Index >:: +setSizes( const SizesHolder& sizes ) +{ + this->segmentSize = max( sizes ); + this->size = sizes.getSize(); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getSize() const +{ + return this->offsets.getSize() - 1; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getStorageSize() const +{ + return this->size * this->segmentSize; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index > +__cuda_callable__ +void +Ellpack< Device, Index >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +Ellpack< Device, Index >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto offsetsView = this->offsets.getView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + for( IndexType j = begin; j < end; j++ ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +Ellpack< Device, Index >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +Ellpack< Device, Index >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +Ellpack< Device, Index >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index > +void +Ellpack< Device, Index >:: +save( File& file ) const +{ + file << this->offsets; +} + +template< typename Device, + typename Index > +void +Ellpack< Device, Index >:: +load( File& file ) +{ + file >> this->offsets; +} + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL -- GitLab