diff --git a/Documentation/Examples/Algorithms/Segments/CMakeLists.txt b/Documentation/Examples/Algorithms/Segments/CMakeLists.txt index dcc32305e8143f3229ebe19021a413f18ce1849e..8df20f63789e1b8438cfc3b58fe26eac3306363e 100644 --- a/Documentation/Examples/Algorithms/Segments/CMakeLists.txt +++ b/Documentation/Examples/Algorithms/Segments/CMakeLists.txt @@ -6,6 +6,10 @@ set( COMMON_EXAMPLES SegmentsExample_CSR_getSegmentsType SegmentsExample_CSR_setSegmentsSizes SegmentsExample_CSR_getSegmentView + SegmentsExample_CSR_forElements + SegmentsExample_CSR_forSegments + SegmentsExample_CSR_sequentialForSegments + SegmentsExample_CSR_reduceSegments ) if( BUILD_CUDA ) diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp new file mode 100644 index 0000000000000000000000000000000000000000..37267a889fd87a5880e24de6d0d0ed1a83d6402b --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp @@ -0,0 +1,49 @@ +#include <iostream> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Allocate array for the segments; + */ + TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); + + /*** + * Insert data into particular segments. + */ + auto data_view = data.getView(); + segments.forElements( 0, size, [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable { + if( localIdx <= segmentIdx ) + data_view[ globalIdx ] = segmentIdx; + } ); + + /*** + * Print the data managed by the segments. + */ + auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; }; + printSegments( segments, fetch, std::cout ); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu new file mode 120000 index 0000000000000000000000000000000000000000..59a419856a1162f6c75d916b1f7492ea1dee30f0 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_forElements.cpp \ No newline at end of file diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3bf7cc50bd46dd6dc2bc72d78d9bc47700fa2589 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp @@ -0,0 +1,52 @@ +#include <iostream> +#include <functional> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + using SegmentViewType = typename SegmentsType::SegmentViewType; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Allocate array for the segments; + */ + TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); + + /*** + * Insert data into particular segments. + */ + auto data_view = data.getView(); + segments.forSegments( 0, size, [=] __cuda_callable__ ( const SegmentViewType& segment ) mutable { + for( auto element : segment ) + if( element.localIndex() <= element.segmentIndex() ) + data_view[ element.globalIndex() ] = element.segmentIndex() + element.localIndex(); + } ); + + /*** + * Print the data managed by the segments. + */ + auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; }; + printSegments( segments, fetch, std::cout ); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu new file mode 120000 index 0000000000000000000000000000000000000000..07825a022346bb3100dd6c7261a32a7f8335e4a1 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_forSegments.cpp \ No newline at end of file diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f784177af3985fc1587a0e6838222116e9134fe1 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp @@ -0,0 +1,69 @@ +#include <iostream> +#include <functional> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Allocate array for the segments; + */ + TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 ); + + /*** + * Insert data into particular segments. + */ + auto data_view = data.getView(); + segments.forElements( 0, size, [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable { + if( localIdx <= segmentIdx ) + data_view[ globalIdx ] = segmentIdx; + } ); + + /*** + * Compute sums of elements in each segment. + */ + TNL::Containers::Vector< double, Device > sums( size ); + auto sums_view = sums.getView(); + auto fetch_full = [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx, bool& compute ) -> double { + if( localIdx <= segmentIdx ) + return data_view[ globalIdx ]; + else + { + compute = false; + return 0.0; + } + }; + auto fetch_brief = [=] __cuda_callable__ ( int globalIdx, bool& compute ) -> double { + return data_view[ globalIdx ]; + }; + + auto keep = [=] __cuda_callable__ ( int globalIdx, const double& value ) mutable { + sums_view[ globalIdx ] = value; }; + segments.reduceAllSegments( fetch_full, std::plus<>{}, keep, 0.0 ); + std::cout << "The sums with full fetch form are: " << sums << std::endl; + segments.reduceAllSegments( fetch_brief, std::plus<>{}, keep, 0.0 ); + std::cout << "The sums with brief fetch form are: " << sums << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu new file mode 120000 index 0000000000000000000000000000000000000000..c133b0c2df3ed29adab10546a5b8435508d4abd0 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_reduceSegments.cpp \ No newline at end of file diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp new file mode 100644 index 0000000000000000000000000000000000000000..76affa43b33283601ef44550975af51d5a45aa26 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp @@ -0,0 +1,45 @@ +#include <iostream> +#include <functional> +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Algorithms/SequentialFor.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +template< typename Device > +void SegmentsExample() +{ + using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >; + using SegmentView = typename SegmentsType::SegmentViewType; + + /*** + * Create segments with given segments sizes. + */ + const int size( 5 ); + SegmentsType segments{ 1, 2, 3, 4, 5 }; + + /*** + * Print the elemets mapping using segment view. + */ + std::cout << "Mapping of local indexes to global indexes:" << std::endl; + + auto f = [=] __cuda_callable__ ( const SegmentView& segment ) { + printf( "Segment idx. %d: ", segment.getSegmentIndex() ); // printf works even in GPU kernels + for( auto element : segment ) + printf( "%d -> %d \t", element.localIndex(), element.globalIndex() ); + printf( "\n" ); + }; + segments.sequentialForSegments( 0, size, f ); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Example of CSR segments on host: " << std::endl; + SegmentsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Example of CSR segments on CUDA GPU: " << std::endl; + SegmentsExample< TNL::Devices::Cuda >(); +#endif + return EXIT_SUCCESS; +} diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu new file mode 120000 index 0000000000000000000000000000000000000000..06e162fd7f3752949bad9b90a5147d012ee5ebe1 --- /dev/null +++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu @@ -0,0 +1 @@ +SegmentsExample_CSR_sequentialForSegments.cpp \ No newline at end of file diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h index af05a9f614f1a79f58d20e3b28285f8c495058dd..f3f1aa88106a3ed9401b23ec27226ebb49b190c0 100644 --- a/src/TNL/Algorithms/Segments/CSR.h +++ b/src/TNL/Algorithms/Segments/CSR.h @@ -294,46 +294,196 @@ class CSR OffsetsContainer& getOffsets(); /** - * \brief Go over all segments and for each segment element call - * function 'f'. The return type of 'f' is bool. - * When its true, the for-loop continues. Once 'f' returns false, the for-loop - * is terminated. + * \brief Iterate over all elements of given segments in parallel and call given lambda function. + * + * \tparam Function is a type of the lambda function to be performed on each element. + * \param begin defines begining of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param end defines end of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param function is the lambda function to be applied on the elements of the segments. + * + * Declaration of the lambda function \e function is supposed to be + * + * ``` + * auto f = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx ) {...} + * ``` + * where \e segmentIdx is index of segment where given element belong to, \e localIdx is rank of the element + * within the segment and \e globalIdx is index of the element within the related container. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_forElements.cpp + * \par Output + * \include SegmentsExample_CSR_forElements.out */ template< typename Function > - void forElements( IndexType begin, IndexType end, Function&& f ) const; + void forElements( IndexType begin, IndexType end, Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::forElements for all elements of the segments. + * + * See \ref TNL::Algorithms::Segments::CSR::forElements for more details. + */ template< typename Function > - void forAllElements( Function&& f ) const; + void forAllElements( Function&& function ) const; + /** + * \brief Iterate over all segments in parallel and call given lambda function. + * + * \tparam Function is a type of the lambda function to be performed on each segment. + * \param begin defines begining of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param end defines end of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param function is the lambda function to be applied on the elements of the segments. + * + * Declaration of the lambda function \e function is supposed to be + * + * ``` + * auto f = [=] __cuda_callable__ ( const SegmentView& segment ) {...} + * ``` + * where \e segment represents given segment (see \ref TNL::Algorithms::Segments::SegmentView). + * Its type is given by \ref SegmentViewType. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp + * \par Output + * \include SegmentsExample_CSR_forSegments.out + */ template< typename Function > - void forSegments( IndexType begin, IndexType end, Function&& f ) const; + void forSegments( IndexType begin, IndexType end, Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::forSegments for all segments. + * + * See \ref TNL::Algorithms::Segments::CSR::forSegments for more details. + */ template< typename Function > - void forAllSegments( Function&& f ) const; + void forAllSegments( Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::forSegments sequentially for particular segments. + * + * With this method, the given segments are processed sequentially one-by-one. This is usefull for example + * for printing of segments based data structures or for debugging reasons. + * + * \param begin defines begining of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param end defines end of an interval [ \e begin, \e end ) of segments on + * elements of which we want to apply the lambda function. + * \param function is the lambda function to be applied on the elements of the segments. + * + * See \ref TNL::Algorithms::Segments::CSR::forSegments for more details. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp + * \par Output + * \include SegmentsExample_CSR_sequentialForSegments.out + */ template< typename Function > - void sequentialForSegments( IndexType begin, IndexType end, Function&& f ) const; + void sequentialForSegments( IndexType begin, IndexType end, Function&& function ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::sequentialForSegments for all segments. + * + * See \ref TNL::Algorithms::Segments::CSR::sequentialForSegments for more details. + */ template< typename Function > void sequentialForAllSegments( Function&& f ) const; - - /*** - * \brief Go over all segments and perform a reduction in each of them. + /** + * \brief Compute reduction in each segment. + * + * \tparam Fetch is type of lambda function for data fetching. + * \tparam Reduce is a reduction operation. + * \tparam Keep is lambda function for storing results from particular segments. + * + * \param begin defines begining of an interval [ \e begin, \e end ) of segments in + * which we want to perform the reduction. + * \param end defines and of an interval [ \e begin, \e end ) of segments in + * which we want to perform the reduction. + * \param fetch is a lambda function for fetching of data. It is suppos have one of the + * following forms: + * 1. Full form + * ``` + * auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) { ... } + * ``` + * 2. Brief form + * ``` + * auto fetch = [=] __cuda_callable__ ( IndexType globalIdx, bool& compute ) { ... } + * ``` + * where for both variants \e segmentIdx is segment index, \e localIdx is a rank of element in the segment, \e globalIdx is index of the element + * in related container and \e compute is a boolean variable which serves for stopping the reduction if it is set to \e false. It is however, + * only a hint and the real behaviour depends on type of kernel used ofr the redcution. + * Some kernels are optimized so that they can be significantly faster with the brief variant of the \e fetch lambda function. + * \param reduce is a lambda function representing the reduction opeartion. It is supposed to be defined as: + * + * ``` + * auto reduce = [=] __cuda_callable__ ( const Value& a, const Value& b ) -> Value { ... } + * ``` + * + * where \e a and \e b are values to be reduced and the lambda function returns result of the reduction. + * \param keep is a lambda function for saving results from particular segments. It is supposed to be defined as: + * + * ``` + * auto keep = [=] __cuda_callable__ ( IndexType segmentIdx, const Value& value ) { ... } + * ``` + * + * where \e segmentIdx is an index of the segment and \e value is the result of the reduction in given segment to be stored. + * + * \par Example + * \include Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp + * \par Output + * \include SegmentsExample_CSR_reduceSegments.out */ - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real > - void reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const; + template< typename Fetch, typename Reduce, typename Keep, typename Value > + void reduceSegments( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const Value& zero ) const; - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real > - void reduceAllSegments( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const; + /** + * \brief Call \ref TNL::Algorithms::Segments::CSR::reduceSegments for all segments. + * + * See \ref TNL::Algorithms::Segments::CSR::reduceSegments for more details. + */ + template< typename Fetch, typename Reduce, typename Keep, typename Value > + void reduceAllSegments( Fetch& fetch, const Reduce& reduce, Keep& keep, const Value& zero ) const; - CSR& operator=( const CSR& rhsSegments ) = default; + /** + * \brief Assignment operator. + * + * It makes a deep copy of the source segments. + * + * \param source are the CSR segments to be assigned. + * \return reference to this instance. + */ + CSR& operator=( const CSR& source ) = default; + /** + * \brief Assignment operator with CSR segments with different template parameters. + * + * It makes a deep copy of the source segments. + * + * \tparam Device_ is device type of the source segments. + * \tparam Index_ is the index type of the source segments. + * \tparam Kernel_ is the kernel type of the source segments. + * \tparam IndexAllocator_ is the index allocator of the source segments. + * \param source is the source segments object. + * \return reference to this instance. + */ template< typename Device_, typename Index_, typename Kernel_, typename IndexAllocator_ > CSR& operator=( const CSR< Device_, Index_, Kernel_, IndexAllocator_ >& source ); + /** + * \brief Method for saving the segments to a file in a binary form. + * + * \param file is the target file. + */ void save( File& file ) const; + /** + * \brief Method for loading the segments from a file in a binary form. + * + * \param file is the source file. + */ void load( File& file ); protected: @@ -343,6 +493,17 @@ class CSR KernelType kernel; }; +/** + * \brief Insertion operator of CSR segments to output stream. + * + * \tparam Device is the device type of the source segments. + * \tparam Index is the index type of the source segments. + * \tparam Kernel is kernel type of the source segments. + * \tparam IndexAllocator is the index allocator of the source segments. + * \param str is the output stream. + * \param segments are the source segments. + * \return reference to the output stream. + */ template< typename Device, typename Index, typename Kernel, diff --git a/src/TNL/Algorithms/Segments/SegmentElement.h b/src/TNL/Algorithms/Segments/SegmentElement.h index 68088ba22cc03a295be6b8e6fc4f622d0a52a4bc..71f78cdd37e3e1b0f019079ac4735c7c8f3ca50b 100644 --- a/src/TNL/Algorithms/Segments/SegmentElement.h +++ b/src/TNL/Algorithms/Segments/SegmentElement.h @@ -18,26 +18,55 @@ namespace TNL { namespace Algorithms { namespace Segments { - +/** + * \brief Simple structure representing one element of a segment. + * + * \tparam Index is type used for indexing of the elements. + */ template< typename Index > class SegmentElement { public: + /** + * \brief Type used for indexing of the elements. + */ using IndexType = Index; + /** + * \brief Constructor of the segment element with all parameters. + * + * \param segmentIdx is in index of the parent segment. + * \param localIdx is a rank of the element in the segment. + * \param globalIdx is an index of the element in the related container. + */ __cuda_callable__ SegmentElement( const IndexType& segmentIdx, const IndexType& localIdx, const IndexType globalIdx ) : segmentIdx( segmentIdx ), localIdx( localIdx ), globalIdx( globalIdx ) {}; + /** + * \brief Returns index of the parent segment. + * + * \return index of the parent segment. + */ __cuda_callable__ const IndexType& segmentIndex() const { return segmentIdx; }; + /** + * \brief Returns rank of the element in the segment. + * + * \return rank of the element in the segment. + */ __cuda_callable__ const IndexType& localIndex() const { return localIdx; }; + /** + * \brief Returns index of the element in the related container. + * + * \return index of the element in the related container. + */ __cuda_callable__ const IndexType& globalIndex() const { return globalIdx; }; @@ -48,8 +77,6 @@ class SegmentElement const IndexType& localIdx; const IndexType globalIdx; - - }; } // namespace Segments diff --git a/src/TNL/Algorithms/Segments/SegmentView.h b/src/TNL/Algorithms/Segments/SegmentView.h index 399e3ddd140801443bf324faa08e11952d101fed..aac6e0a9408844ea07133779dc75f73113047d0f 100644 --- a/src/TNL/Algorithms/Segments/SegmentView.h +++ b/src/TNL/Algorithms/Segments/SegmentView.h @@ -17,19 +17,48 @@ namespace TNL { namespace Algorithms { namespace Segments { +/** + * \brief Data structure for accessing particular segment. + * + * \tparam Index is type for indexing elements in related segments. + * + * See the template specializations \ref TNL::Algorithms::Segments::SegmentView< Index, ColumnMajorOrder > + * and \ref TNL::Algorithms::Segments::SegmentView< Index, RowMajorOrder > for column-major + * and row-major elements organization respectively. They have equivalent interface. + */ template< typename Index, ElementsOrganization Organization > class SegmentView; + +/** + * \brief Data structure for accessing particular segment. + * + * \tparam Index is type for indexing elements in related segments. + */ template< typename Index > class SegmentView< Index, ColumnMajorOrder > { public: + /** + * \brief Type for indexing elements in related segments. + */ using IndexType = Index; + /** + * \brief Type of iterator for iterating over elements of the segment. + */ using IteratorType = SegmentViewIterator< SegmentView >; + /** + * \brief Conctructor with all parameters. + * + * \param segmentIdx is an index of segment the segment view will point to. + * \param offset is an offset of the segment in the parent segments. + * \param size is a size of the segment. + * \param step is stepping between neighbouring elements in the segment. + */ __cuda_callable__ SegmentView( const IndexType segmentIdx, const IndexType offset, @@ -37,16 +66,32 @@ class SegmentView< Index, ColumnMajorOrder > const IndexType step ) : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ), step( step ){}; + /** + * \brief Copy constructor. + * + * \param view is the source view. + */ __cuda_callable__ SegmentView( const SegmentView& view ) : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){}; + /** + * \brief Get the size of the segment, i.e. number of elements in the segment. + * + * \return number of elements in the segment. + */ __cuda_callable__ const IndexType& getSize() const { return this->segmentSize; }; + /** + * \brief Get global index of an element with rank \e localIndex in the segment. + * + * \param localIndex is the rank of the element in the segment. + * \return global index of the element. + */ __cuda_callable__ IndexType getGlobalIndex( const IndexType localIndex ) const { @@ -54,6 +99,11 @@ class SegmentView< Index, ColumnMajorOrder > return segmentOffset + localIndex * step; }; + /** + * \brief Get index of the segment. + * + * \return index of the segment. + */ __cuda_callable__ const IndexType& getSegmentIndex() const { @@ -102,10 +152,24 @@ class SegmentView< Index, RowMajorOrder > { public: + /** + * \brief Type for indexing elements in related segments. + */ using IndexType = Index; + /** + * \brief Type of iterator for iterating over elements of the segment. + */ using IteratorType = SegmentViewIterator< SegmentView >; + /** + * \brief Conctructor with all parameters. + * + * \param segmentIdx is an index of segment the segment view will point to. + * \param offset is an offset of the segment in the parent segments. + * \param size is a size of the segment. + * \param step is stepping between neighbouring elements in the segment. + */ __cuda_callable__ SegmentView( const IndexType segmentIdx, const IndexType offset, @@ -113,12 +177,32 @@ class SegmentView< Index, RowMajorOrder > const IndexType step = 1 ) // For compatibility with previous specialization : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ){}; + /** + * \brief Copy constructor. + * + * \param view is the source view. + */ + __cuda_callable__ + SegmentView( const SegmentView& view ) + : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ) {}; + + /** + * \brief Get the size of the segment, i.e. number of elements in the segment. + * + * \return number of elements in the segment. + */ __cuda_callable__ const IndexType& getSize() const { return this->segmentSize; }; + /** + * \brief Get global index of an element with rank \e localIndex in the segment. + * + * \param localIndex is the rank of the element in the segment. + * \return global index of the element. + */ __cuda_callable__ IndexType getGlobalIndex( const IndexType localIndex ) const { @@ -126,6 +210,11 @@ class SegmentView< Index, RowMajorOrder > return segmentOffset + localIndex; }; + /** + * \brief Get index of the segment. + * + * \return index of the segment. + */ __cuda_callable__ const IndexType& getSegmentIndex() const { diff --git a/src/TNL/Algorithms/Segments/SegmentViewIterator.h b/src/TNL/Algorithms/Segments/SegmentViewIterator.h index 335ce91aa1f1903ca9eed64319b88ea4d7cd0c2e..a0e78883268a0b77dcc0069ed9ceecd36d43fbaf 100644 --- a/src/TNL/Algorithms/Segments/SegmentViewIterator.h +++ b/src/TNL/Algorithms/Segments/SegmentViewIterator.h @@ -19,6 +19,13 @@ namespace TNL { namespace Algorithms { namespace Segments { +/** + * \brief Iterator for iterating over elements of a segment. + * + * The iterator can be used even in GPU kernels. + * + * \tparam SegmentView is a type of related segment view. + */ template< typename SegmentView > class SegmentViewIterator { @@ -61,12 +68,28 @@ class SegmentViewIterator __cuda_callable__ bool operator!=( const SegmentViewIterator& other ) const; + /** + * \brief Operator for incrementing the iterator, i.e. moving to the next element. + * + * \return reference to this iterator. + */ __cuda_callable__ SegmentViewIterator& operator++(); + /** + * \brief Operator for decrementing the iterator, i.e. moving to the previous element. + * + * \return reference to this iterator. + */ __cuda_callable__ SegmentViewIterator& operator--(); + /** + * \brief Operator for derefrencing the iterator. + * + * It returns structure \ref SegmentElementType which represent one element of a segment. + * \return segment element the iterator points to. + */ __cuda_callable__ const SegmentElementType operator*() const;