From 9eb2d3180e217147984e9f32a70f65fa4521e700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Thu, 8 Apr 2021 21:08:24 +0200 Subject: [PATCH] Writting documentation on CSR segments. --- .../Algorithms/Segments/CMakeLists.txt | 4 + .../SegmentsExample_CSR_forElements.cpp | 49 +++++ .../SegmentsExample_CSR_forElements.cu | 1 + .../SegmentsExample_CSR_forSegments.cpp | 52 +++++ .../SegmentsExample_CSR_forSegments.cu | 1 + .../SegmentsExample_CSR_reduceSegments.cpp | 69 +++++++ .../SegmentsExample_CSR_reduceSegments.cu | 1 + ...mentsExample_CSR_sequentialForSegments.cpp | 45 ++++ ...gmentsExample_CSR_sequentialForSegments.cu | 1 + src/TNL/Algorithms/Segments/CSR.h | 195 ++++++++++++++++-- src/TNL/Algorithms/Segments/SegmentElement.h | 33 ++- src/TNL/Algorithms/Segments/SegmentView.h | 89 ++++++++ .../Algorithms/Segments/SegmentViewIterator.h | 23 +++ 13 files changed, 543 insertions(+), 20 deletions(-) create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu diff --git a/Documentation/Examples/Algorithms/Segments/CMakeLists.txt b/Documentation/Examples/Algorithms/Segments/CMakeLists.txt index dcc32305e8..8df20f6378 100644 --- a/Documentation/Examples/Algorithms/Segments/CMakeLists.txt +++ b/Documentation/Examples/Algorithms/Segments/CMakeLists.txt @@ -6,6 +6,10 @@ set( COMMON_EXAMPLES SegmentsExample_CSR_getSegmentsType SegmentsExample_CSR_setSegmentsSizes SegmentsExample_CSR_getSegmentView + SegmentsExample_CSR_forElements + SegmentsExample_CSR_forSegments + SegmentsExample_CSR_sequentialForSegments + SegmentsExample_CSR_reduceSegments ) if( BUILD_CUDA ) diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp new file mode 100644 index 0000000000..37267a889f --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp @@ -0,0 +1,49 @@ +#include <iostream> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Allocate array for the segments; + */ + TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); + + /*** + * Insert data into particular segments. + */ + auto data_view = data.getView(); + segments.forElements( 0, size, [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable { + if( localIdx <= segmentIdx ) + data_view[ globalIdx ] = segmentIdx; + } ); + + /*** + * Print the data managed by the segments. + */ + auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; }; + printSegments( segments, fetch, std::cout ); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu new file mode 120000 index 0000000000..59a419856a --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_forElements.cpp \ No newline at end of file diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp new file mode 100644 index 0000000000..3bf7cc50bd --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp @@ -0,0 +1,52 @@ +#include <iostream> +#include <functional> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + using SegmentViewType = typename SegmentsType::SegmentViewType; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Allocate array for the segments; + */ + TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); + + /*** + * Insert data into particular segments. + */ + auto data_view = data.getView(); + segments.forSegments( 0, size, [=] __cuda_callable__ ( const SegmentViewType& segment ) mutable { + for( auto element : segment ) + if( element.localIndex() <= element.segmentIndex() ) + data_view[ element.globalIndex() ] = element.segmentIndex() + element.localIndex(); + } ); + + /*** + * Print the data managed by the segments. + */ + auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; }; + printSegments( segments, fetch, std::cout ); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu new file mode 120000 index 0000000000..07825a0223 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_forSegments.cpp \ No newline at end of file diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp new file mode 100644 index 0000000000..f784177af3 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp @@ -0,0 +1,69 @@ +#include <iostream> +#include <functional> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Allocate array for the segments; + */ + TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); + + /*** + * Insert data into particular segments. + */ + auto data_view = data.getView(); + segments.forElements( 0, size, [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable { + if( localIdx <= segmentIdx ) + data_view[ globalIdx ] = segmentIdx; + } ); + + /*** + * Compute sums of elements in each segment. + */ + TNL::Containers::Vector< double, Device > sums( size ); + auto sums_view = sums.getView(); + auto fetch_full = [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx, bool& compute ) -> double { + if( localIdx <= segmentIdx ) + return data_view[ globalIdx ]; + else + { + compute = false; + return 0.0; + } + }; + auto fetch_brief = [=] __cuda_callable__ ( int globalIdx, bool& compute ) -> double { + return data_view[ globalIdx ]; + }; + + auto keep = [=] __cuda_callable__ ( int globalIdx, const double& value ) mutable { + sums_view[ globalIdx ] = value; }; + segments.reduceAllSegments( fetch_full, std::plus<>{}, keep, 0.0 ); + std::cout << "The sums with full fetch form are: " << sums << std::endl; + segments.reduceAllSegments( fetch_brief, std::plus<>{}, keep, 0.0 ); + std::cout << "The sums with brief fetch form are: " << sums << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu new file mode 120000 index 0000000000..c133b0c2df --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_reduceSegments.cpp \ No newline at end of file diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp new file mode 100644 index 0000000000..76affa43b3 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp @@ -0,0 +1,45 @@ +#include <iostream> +#include <functional> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Algorithms/SequentialFor.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + using SegmentView = typename SegmentsType::SegmentViewType; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Print the elemets mapping using segment view. + */ + std::cout << "Mapping of local indexes to global indexes:" << std::endl; + + auto f = [=] __cuda_callable__ ( const SegmentView& segment ) { + printf( "Segment idx. %d: ", segment.getSegmentIndex() ); // printf works even in GPU kernels + for( auto element : segment ) + printf( "%d -> %d \t", element.localIndex(), element.globalIndex() ); + printf( "\n" ); + }; + segments.sequentialForSegments( 0, size, f ); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu new file mode 120000 index 0000000000..06e162fd7f --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_sequentialForSegments.cpp \ No newline at end of file diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h index af05a9f614..f3f1aa8810 100644 --- a/src/TNL/Algorithms/Segments/CSR.h +++ b/src/TNL/Algorithms/Segments/CSR.h @@ -294,46 +294,196 @@ class CSR OffsetsContainer& getOffsets(); /** - * \brief Go over all segments and for each segment element call - * function 'f'. The return type of 'f' is bool. - * When its true, the for-loop continues. Once 'f' returns false, the for-loop - * is terminated. + * \brief Iterate over all elements of given segments in parallel and call given lambda function. + * + * \tparam Function is a type of the lambda function to be performed on each element. + * \param begin defines begining of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param end defines end of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param function is the lambda function to be applied on the elements of the segments. + * + * Declaration of the lambda function \e function is supposed to be + * + * ``` + * auto f = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx ) {...} + * ``` + * where \e segmentIdx is index of segment where given element belong to, \e localIdx is rank of the element + * within the segment and \e globalIdx is index of the element within the related container. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_forElements.cpp + * \par Output + * \include SegmentsExample_CSR_forElements.out */ template< typename Function > - void forElements( IndexType begin, IndexType end, Function&& f ) const; + void forElements( IndexType begin, IndexType end, Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::forElements for all elements of the segments. + * + * See \ref TNL::Algorithms::Segments::CSR::forElements for more details. + */ template< typename Function > - void forAllElements( Function&& f ) const; + void forAllElements( Function&& function ) const; + /** + * \brief Iterate over all segments in parallel and call given lambda function. + * + * \tparam Function is a type of the lambda function to be performed on each segment. + * \param begin defines begining of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param end defines end of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param function is the lambda function to be applied on the elements of the segments. + * + * Declaration of the lambda function \e function is supposed to be + * + * ``` + * auto f = [=] __cuda_callable__ ( const SegmentView& segment ) {...} + * ``` + * where \e segment represents given segment (see \ref TNL::Algorithms::Segments::SegmentView). + * Its type is given by \ref SegmentViewType. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp + * \par Output + * \include SegmentsExample_CSR_forSegments.out + */ template< typename Function > - void forSegments( IndexType begin, IndexType end, Function&& f ) const; + void forSegments( IndexType begin, IndexType end, Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::forSegments for all segments. + * + * See \ref TNL::Algorithms::Segments::CSR::forSegments for more details. + */ template< typename Function > - void forAllSegments( Function&& f ) const; + void forAllSegments( Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::forSegments sequentially for particular segments. + * + * With this method, the given segments are processed sequentially one-by-one. This is usefull for example + * for printing of segments based data structures or for debugging reasons. + * + * \param begin defines begining of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param end defines end of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param function is the lambda function to be applied on the elements of the segments. + * + * See \ref TNL::Algorithms::Segments::CSR::forSegments for more details. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp + * \par Output + * \include SegmentsExample_CSR_sequentialForSegments.out + */ template< typename Function > - void sequentialForSegments( IndexType begin, IndexType end, Function&& f ) const; + void sequentialForSegments( IndexType begin, IndexType end, Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::sequentialForSegments for all segments. + * + * See \ref TNL::Algorithms::Segments::CSR::sequentialForSegments for more details. + */ template< typename Function > void sequentialForAllSegments( Function&& f ) const; - - /*** - * \brief Go over all segments and perform a reduction in each of them. + /** + * \brief Compute reduction in each segment. + * + * \tparam Fetch is type of lambda function for data fetching. + * \tparam Reduce is a reduction operation. + * \tparam Keep is lambda function for storing results from particular segments. + * + * \param begin defines begining of an interval [ \e begin, \e end ) of segments in + * which we want to perform the reduction. + * \param end defines and of an interval [ \e begin, \e end ) of segments in + * which we want to perform the reduction. + * \param fetch is a lambda function for fetching of data. It is suppos have one of the + * following forms: + * 1. Full form + * ``` + * auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) { ... } + * ``` + * 2. Brief form + * ``` + * auto fetch = [=] __cuda_callable__ ( IndexType globalIdx, bool& compute ) { ... } + * ``` + * where for both variants \e segmentIdx is segment index, \e localIdx is a rank of element in the segment, \e globalIdx is index of the element + * in related container and \e compute is a boolean variable which serves for stopping the reduction if it is set to \e false. It is however, + * only a hint and the real behaviour depends on type of kernel used ofr the redcution. + * Some kernels are optimized so that they can be significantly faster with the brief variant of the \e fetch lambda function. + * \param reduce is a lambda function representing the reduction opeartion. It is supposed to be defined as: + * + * ``` + * auto reduce = [=] __cuda_callable__ ( const Value& a, const Value& b ) -> Value { ... } + * ``` + * + * where \e a and \e b are values to be reduced and the lambda function returns result of the reduction. + * \param keep is a lambda function for saving results from particular segments. It is supposed to be defined as: + * + * ``` + * auto keep = [=] __cuda_callable__ ( IndexType segmentIdx, const Value& value ) { ... } + * ``` + * + * where \e segmentIdx is an index of the segment and \e value is the result of the reduction in given segment to be stored. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp + * \par Output + * \include SegmentsExample_CSR_reduceSegments.out */ - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real > - void reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const; + template< typename Fetch, typename Reduce, typename Keep, typename Value > + void reduceSegments( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const Value& zero ) const; - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real > - void reduceAllSegments( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::reduceSegments for all segments. + * + * See \ref TNL::Algorithms::Segments::CSR::reduceSegments for more details. + */ + template< typename Fetch, typename Reduce, typename Keep, typename Value > + void reduceAllSegments( Fetch& fetch, const Reduce& reduce, Keep& keep, const Value& zero ) const; - CSR& operator=( const CSR& rhsSegments ) = default; + /** + * \brief Assignment operator. + * + * It makes a deep copy of the source segments. + * + * \param source are the CSR segments to be assigned. + * \return reference to this instance. + */ + CSR& operator=( const CSR& source ) = default; + /** + * \brief Assignment operator with CSR segments with different template parameters. + * + * It makes a deep copy of the source segments. + * + * \tparam Device_ is device type of the source segments. + * \tparam Index_ is the index type of the source segments. + * \tparam Kernel_ is the kernel type of the source segments. + * \tparam IndexAllocator_ is the index allocator of the source segments. + * \param source is the source segments object. + * \return reference to this instance. + */ template< typename Device_, typename Index_, typename Kernel_, typename IndexAllocator_ > CSR& operator=( const CSR< Device_, Index_, Kernel_, IndexAllocator_ >& source ); + /** + * \brief Method for saving the segments to a file in a binary form. + * + * \param file is the target file. + */ void save( File& file ) const; + /** + * \brief Method for loading the segments from a file in a binary form. + * + * \param file is the source file. + */ void load( File& file ); protected: @@ -343,6 +493,17 @@ class CSR KernelType kernel; }; +/** + * \brief Insertion operator of CSR segments to output stream. + * + * \tparam Device is the device type of the source segments. + * \tparam Index is the index type of the source segments. + * \tparam Kernel is kernel type of the source segments. + * \tparam IndexAllocator is the index allocator of the source segments. + * \param str is the output stream. + * \param segments are the source segments. + * \return reference to the output stream. + */ template< typename Device, typename Index, typename Kernel, diff --git a/src/TNL/Algorithms/Segments/SegmentElement.h b/src/TNL/Algorithms/Segments/SegmentElement.h index 68088ba22c..71f78cdd37 100644 --- a/src/TNL/Algorithms/Segments/SegmentElement.h +++ b/src/TNL/Algorithms/Segments/SegmentElement.h @@ -18,26 +18,55 @@ namespace TNL { namespace Algorithms { namespace Segments { - +/** + * \brief Simple structure representing one element of a segment. + * + * \tparam Index is type used for indexing of the elements. + */ template< typename Index > class SegmentElement { public: + /** + * \brief Type used for indexing of the elements. + */ using IndexType = Index; + /** + * \brief Constructor of the segment element with all parameters. + * + * \param segmentIdx is in index of the parent segment. + * \param localIdx is a rank of the element in the segment. + * \param globalIdx is an index of the element in the related container. + */ __cuda_callable__ SegmentElement( const IndexType& segmentIdx, const IndexType& localIdx, const IndexType globalIdx ) : segmentIdx( segmentIdx ), localIdx( localIdx ), globalIdx( globalIdx ) {}; + /** + * \brief Returns index of the parent segment. + * + * \return index of the parent segment. + */ __cuda_callable__ const IndexType& segmentIndex() const { return segmentIdx; }; + /** + * \brief Returns rank of the element in the segment. + * + * \return rank of the element in the segment. + */ __cuda_callable__ const IndexType& localIndex() const { return localIdx; }; + /** + * \brief Returns index of the element in the related container. + * + * \return index of the element in the related container. + */ __cuda_callable__ const IndexType& globalIndex() const { return globalIdx; }; @@ -48,8 +77,6 @@ class SegmentElement const IndexType& localIdx; const IndexType globalIdx; - - }; } // namespace Segments diff --git a/src/TNL/Algorithms/Segments/SegmentView.h b/src/TNL/Algorithms/Segments/SegmentView.h index 399e3ddd14..aac6e0a940 100644 --- a/src/TNL/Algorithms/Segments/SegmentView.h +++ b/src/TNL/Algorithms/Segments/SegmentView.h @@ -17,19 +17,48 @@ namespace TNL { namespace Algorithms { namespace Segments { +/** + * \brief Data structure for accessing particular segment. + * + * \tparam Index is type for indexing elements in related segments. + * + * See the template specializations \ref TNL::Algorithms::Segments::SegmentView< Index, ColumnMajorOrder > + * and \ref TNL::Algorithms::Segments::SegmentView< Index, RowMajorOrder > for column-major + * and row-major elements organization respectively. They have equivalent interface. + */ template< typename Index, ElementsOrganization Organization > class SegmentView; + +/** + * \brief Data structure for accessing particular segment. + * + * \tparam Index is type for indexing elements in related segments. + */ template< typename Index > class SegmentView< Index, ColumnMajorOrder > { public: + /** + * \brief Type for indexing elements in related segments. + */ using IndexType = Index; + /** + * \brief Type of iterator for iterating over elements of the segment. + */ using IteratorType = SegmentViewIterator< SegmentView >; + /** + * \brief Conctructor with all parameters. + * + * \param segmentIdx is an index of segment the segment view will point to. + * \param offset is an offset of the segment in the parent segments. + * \param size is a size of the segment. + * \param step is stepping between neighbouring elements in the segment. + */ __cuda_callable__ SegmentView( const IndexType segmentIdx, const IndexType offset, @@ -37,16 +66,32 @@ class SegmentView< Index, ColumnMajorOrder > const IndexType step ) : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ), step( step ){}; + /** + * \brief Copy constructor. + * + * \param view is the source view. + */ __cuda_callable__ SegmentView( const SegmentView& view ) : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){}; + /** + * \brief Get the size of the segment, i.e. number of elements in the segment. + * + * \return number of elements in the segment. + */ __cuda_callable__ const IndexType& getSize() const { return this->segmentSize; }; + /** + * \brief Get global index of an element with rank \e localIndex in the segment. + * + * \param localIndex is the rank of the element in the segment. + * \return global index of the element. + */ __cuda_callable__ IndexType getGlobalIndex( const IndexType localIndex ) const { @@ -54,6 +99,11 @@ class SegmentView< Index, ColumnMajorOrder > return segmentOffset + localIndex * step; }; + /** + * \brief Get index of the segment. + * + * \return index of the segment. + */ __cuda_callable__ const IndexType& getSegmentIndex() const { @@ -102,10 +152,24 @@ class SegmentView< Index, RowMajorOrder > { public: + /** + * \brief Type for indexing elements in related segments. + */ using IndexType = Index; + /** + * \brief Type of iterator for iterating over elements of the segment. + */ using IteratorType = SegmentViewIterator< SegmentView >; + /** + * \brief Conctructor with all parameters. + * + * \param segmentIdx is an index of segment the segment view will point to. + * \param offset is an offset of the segment in the parent segments. + * \param size is a size of the segment. + * \param step is stepping between neighbouring elements in the segment. + */ __cuda_callable__ SegmentView( const IndexType segmentIdx, const IndexType offset, @@ -113,12 +177,32 @@ class SegmentView< Index, RowMajorOrder > const IndexType step = 1 ) // For compatibility with previous specialization : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ){}; + /** + * \brief Copy constructor. + * + * \param view is the source view. + */ + __cuda_callable__ + SegmentView( const SegmentView& view ) + : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ) {}; + + /** + * \brief Get the size of the segment, i.e. number of elements in the segment. + * + * \return number of elements in the segment. + */ __cuda_callable__ const IndexType& getSize() const { return this->segmentSize; }; + /** + * \brief Get global index of an element with rank \e localIndex in the segment. + * + * \param localIndex is the rank of the element in the segment. + * \return global index of the element. + */ __cuda_callable__ IndexType getGlobalIndex( const IndexType localIndex ) const { @@ -126,6 +210,11 @@ class SegmentView< Index, RowMajorOrder > return segmentOffset + localIndex; }; + /** + * \brief Get index of the segment. + * + * \return index of the segment. + */ __cuda_callable__ const IndexType& getSegmentIndex() const { diff --git a/src/TNL/Algorithms/Segments/SegmentViewIterator.h b/src/TNL/Algorithms/Segments/SegmentViewIterator.h index 335ce91aa1..a0e7888326 100644 --- a/src/TNL/Algorithms/Segments/SegmentViewIterator.h +++ b/src/TNL/Algorithms/Segments/SegmentViewIterator.h @@ -19,6 +19,13 @@ namespace TNL { namespace Algorithms { namespace Segments { +/** + * \brief Iterator for iterating over elements of a segment. + * + * The iterator can be used even in GPU kernels. + * + * \tparam SegmentView is a type of related segment view. + */ template< typename SegmentView > class SegmentViewIterator { @@ -61,12 +68,28 @@ class SegmentViewIterator __cuda_callable__ bool operator!=( const SegmentViewIterator& other ) const; + /** + * \brief Operator for incrementing the iterator, i.e. moving to the next element. + * + * \return reference to this iterator. + */ __cuda_callable__ SegmentViewIterator& operator++(); + /** + * \brief Operator for decrementing the iterator, i.e. moving to the previous element. + * + * \return reference to this iterator. + */ __cuda_callable__ SegmentViewIterator& operator--(); + /** + * \brief Operator for derefrencing the iterator. + * + * It returns structure \ref SegmentElementType which represent one element of a segment. + * \return segment element the iterator points to. + */ __cuda_callable__ const SegmentElementType operator*() const; -- GitLab