From 9eb2d3180e217147984e9f32a70f65fa4521e700 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 8 Apr 2021 21:08:24 +0200
Subject: [PATCH] Writting documentation on CSR segments.

---
 .../Algorithms/Segments/CMakeLists.txt        |   4 +
 .../SegmentsExample_CSR_forElements.cpp       |  49 +++++
 .../SegmentsExample_CSR_forElements.cu        |   1 +
 .../SegmentsExample_CSR_forSegments.cpp       |  52 +++++
 .../SegmentsExample_CSR_forSegments.cu        |   1 +
 .../SegmentsExample_CSR_reduceSegments.cpp    |  69 +++++++
 .../SegmentsExample_CSR_reduceSegments.cu     |   1 +
 ...mentsExample_CSR_sequentialForSegments.cpp |  45 ++++
 ...gmentsExample_CSR_sequentialForSegments.cu |   1 +
 src/TNL/Algorithms/Segments/CSR.h             | 195 ++++++++++++++++--
 src/TNL/Algorithms/Segments/SegmentElement.h  |  33 ++-
 src/TNL/Algorithms/Segments/SegmentView.h     |  89 ++++++++
 .../Algorithms/Segments/SegmentViewIterator.h |  23 +++
 13 files changed, 543 insertions(+), 20 deletions(-)
 create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp
 create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu
 create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp
 create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu
 create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp
 create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu
 create mode 100644 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp
 create mode 120000 Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu

diff --git a/Documentation/Examples/Algorithms/Segments/CMakeLists.txt b/Documentation/Examples/Algorithms/Segments/CMakeLists.txt
index dcc32305e8..8df20f6378 100644
--- a/Documentation/Examples/Algorithms/Segments/CMakeLists.txt
+++ b/Documentation/Examples/Algorithms/Segments/CMakeLists.txt
@@ -6,6 +6,10 @@ set( COMMON_EXAMPLES
    SegmentsExample_CSR_getSegmentsType
    SegmentsExample_CSR_setSegmentsSizes
    SegmentsExample_CSR_getSegmentView
+   SegmentsExample_CSR_forElements
+   SegmentsExample_CSR_forSegments
+   SegmentsExample_CSR_sequentialForSegments
+   SegmentsExample_CSR_reduceSegments
 )
 
 if( BUILD_CUDA )
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp
new file mode 100644
index 0000000000..37267a889f
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cpp
@@ -0,0 +1,49 @@
+#include <iostream>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void SegmentsExample()
+{
+   using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >;
+
+   /***
+    * Create segments with given segments sizes.
+    */
+   const int size( 5 );
+   SegmentsType segments{ 1, 2, 3, 4, 5 };
+
+   /***
+    * Allocate array for the segments;
+    */
+   TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 );
+
+   /***
+    * Insert data into particular segments.
+    */
+   auto data_view = data.getView();
+   segments.forElements( 0, size, [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable {
+      if( localIdx <= segmentIdx )
+         data_view[ globalIdx ] = segmentIdx;
+   } );
+
+   /***
+    * Print the data managed by the segments.
+    */
+   auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; };
+   printSegments( segments, fetch, std::cout );
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Example of CSR segments on host: " << std::endl;
+   SegmentsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Example of CSR segments on CUDA GPU: " << std::endl;
+   SegmentsExample< TNL::Devices::Cuda >();
+#endif
+   return EXIT_SUCCESS;
+}
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu
new file mode 120000
index 0000000000..59a419856a
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forElements.cu
@@ -0,0 +1 @@
+SegmentsExample_CSR_forElements.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp
new file mode 100644
index 0000000000..3bf7cc50bd
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp
@@ -0,0 +1,52 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void SegmentsExample()
+{
+   using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >;
+   using SegmentViewType = typename SegmentsType::SegmentViewType;
+
+   /***
+    * Create segments with given segments sizes.
+    */
+   const int size( 5 );
+   SegmentsType segments{ 1, 2, 3, 4, 5 };
+
+   /***
+    * Allocate array for the segments;
+    */
+   TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 );
+
+   /***
+    * Insert data into particular segments.
+    */
+   auto data_view = data.getView();
+   segments.forSegments( 0, size, [=] __cuda_callable__ ( const SegmentViewType& segment ) mutable {
+      for( auto element : segment )
+         if( element.localIndex() <= element.segmentIndex() )
+            data_view[ element.globalIndex() ] = element.segmentIndex() + element.localIndex();
+   } );
+
+   /***
+    * Print the data managed by the segments.
+    */
+   auto fetch = [=] __cuda_callable__ ( int globalIdx ) -> double { return data_view[ globalIdx ]; };
+   printSegments( segments, fetch, std::cout );
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Example of CSR segments on host: " << std::endl;
+   SegmentsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Example of CSR segments on CUDA GPU: " << std::endl;
+   SegmentsExample< TNL::Devices::Cuda >();
+#endif
+   return EXIT_SUCCESS;
+}
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu
new file mode 120000
index 0000000000..07825a0223
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_forSegments.cu
@@ -0,0 +1 @@
+SegmentsExample_CSR_forSegments.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp
new file mode 100644
index 0000000000..f784177af3
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp
@@ -0,0 +1,69 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void SegmentsExample()
+{
+   using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >;
+
+   /***
+    * Create segments with given segments sizes.
+    */
+   const int size( 5 );
+   SegmentsType segments{ 1, 2, 3, 4, 5 };
+
+   /***
+    * Allocate array for the segments;
+    */
+   TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 );
+
+   /***
+    * Insert data into particular segments.
+    */
+   auto data_view = data.getView();
+   segments.forElements( 0, size, [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx ) mutable {
+      if( localIdx <= segmentIdx )
+         data_view[ globalIdx ] = segmentIdx;
+   } );
+
+   /***
+    * Compute sums of elements in each segment.
+    */
+   TNL::Containers::Vector< double, Device > sums( size );
+   auto sums_view = sums.getView();
+   auto fetch_full = [=] __cuda_callable__ ( int segmentIdx, int localIdx, int globalIdx, bool& compute ) -> double {
+      if( localIdx <= segmentIdx )
+         return data_view[ globalIdx ];
+      else
+      {
+         compute = false;
+         return 0.0;
+      }
+   };
+   auto fetch_brief = [=] __cuda_callable__ ( int globalIdx, bool& compute ) -> double {
+      return data_view[ globalIdx ];
+   };
+
+   auto keep = [=] __cuda_callable__ ( int globalIdx, const double& value  ) mutable {
+      sums_view[ globalIdx ] = value; };
+   segments.reduceAllSegments( fetch_full, std::plus<>{}, keep, 0.0 );
+   std::cout << "The sums with full fetch form are: " << sums << std::endl;
+   segments.reduceAllSegments( fetch_brief, std::plus<>{}, keep, 0.0 );
+   std::cout << "The sums with brief fetch form are: " << sums << std::endl;
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Example of CSR segments on host: " << std::endl;
+   SegmentsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Example of CSR segments on CUDA GPU: " << std::endl;
+   SegmentsExample< TNL::Devices::Cuda >();
+#endif
+   return EXIT_SUCCESS;
+}
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu
new file mode 120000
index 0000000000..c133b0c2df
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cu
@@ -0,0 +1 @@
+SegmentsExample_CSR_reduceSegments.cpp
\ No newline at end of file
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp
new file mode 100644
index 0000000000..76affa43b3
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp
@@ -0,0 +1,45 @@
+#include <iostream>
+#include <functional>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Algorithms/SequentialFor.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+template< typename Device >
+void SegmentsExample()
+{
+   using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >;
+   using SegmentView = typename SegmentsType::SegmentViewType;
+
+   /***
+    * Create segments with given segments sizes.
+    */
+   const int size( 5 );
+   SegmentsType segments{ 1, 2, 3, 4, 5 };
+
+   /***
+    * Print the elemets mapping using segment view.
+    */
+   std::cout << "Mapping of local indexes to global indexes:" << std::endl;
+
+   auto f = [=] __cuda_callable__ ( const SegmentView& segment ) {
+      printf( "Segment idx. %d: ", segment.getSegmentIndex() );                 // printf works even in GPU kernels
+      for( auto element : segment )
+         printf( "%d -> %d \t", element.localIndex(), element.globalIndex() );
+      printf( "\n" );
+   };
+   segments.sequentialForSegments( 0, size, f );
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Example of CSR segments on host: " << std::endl;
+   SegmentsExample< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Example of CSR segments on CUDA GPU: " << std::endl;
+   SegmentsExample< TNL::Devices::Cuda >();
+#endif
+   return EXIT_SUCCESS;
+}
diff --git a/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu
new file mode 120000
index 0000000000..06e162fd7f
--- /dev/null
+++ b/Documentation/Examples/Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cu
@@ -0,0 +1 @@
+SegmentsExample_CSR_sequentialForSegments.cpp
\ No newline at end of file
diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h
index af05a9f614..f3f1aa8810 100644
--- a/src/TNL/Algorithms/Segments/CSR.h
+++ b/src/TNL/Algorithms/Segments/CSR.h
@@ -294,46 +294,196 @@ class CSR
       OffsetsContainer& getOffsets();
 
       /**
-       * \brief Go over all segments and for each segment element call
-       * function 'f'. The return type of 'f' is bool.
-       * When its true, the for-loop continues. Once 'f' returns false, the for-loop
-       * is terminated.
+       * \brief Iterate over all elements of given segments in parallel and call given lambda function.
+       *
+       * \tparam Function is a type of the lambda function to be performed on each element.
+       * \param begin defines begining of an interval [ \e begin, \e end ) of segments on
+       *    elements of which we want to apply the lambda function.
+       * \param end defines end of an interval [ \e begin, \e end ) of segments on
+       *    elements of which we want to apply the lambda function.
+       * \param function is the lambda function to be applied on the elements of the segments.
+       *
+       * Declaration of the lambda function \e function is supposed to be
+       *
+       * ```
+       * auto f = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx ) {...} 
+       * ```
+       * where \e segmentIdx is index of segment where given element belong to, \e localIdx is rank of the element
+       * within the segment and \e globalIdx is index of the element within the related container.
+       *
+       * \par Example
+       * \include Algorithms/Segments/SegmentsExample_CSR_forElements.cpp
+       * \par Output
+       * \include SegmentsExample_CSR_forElements.out
        */
       template< typename Function >
-      void forElements( IndexType begin, IndexType end, Function&& f ) const;
+      void forElements( IndexType begin, IndexType end, Function&& function ) const;
 
+      /**
+       * \brief Call \ref TNL::Algorithms::Segments::CSR::forElements for all elements of the segments.
+       *
+       * See \ref TNL::Algorithms::Segments::CSR::forElements for more details.
+       */
       template< typename Function >
-      void forAllElements( Function&& f ) const;
+      void forAllElements( Function&& function ) const;
 
+      /**
+       * \brief Iterate over all segments in parallel and call given lambda function.
+       *
+       * \tparam Function is a type of the lambda function to be performed on each segment.
+       * \param begin defines begining of an interval [ \e begin, \e end ) of segments on
+       *    elements of which we want to apply the lambda function.
+       * \param end defines end of an interval [ \e begin, \e end ) of segments on
+       *    elements of which we want to apply the lambda function.
+       * \param function is the lambda function to be applied on the elements of the segments.
+       *
+       *  Declaration of the lambda function \e function is supposed to be
+       *
+       * ```
+       * auto f = [=] __cuda_callable__ ( const SegmentView& segment ) {...}
+       * ```
+       * where \e segment represents given segment (see \ref TNL::Algorithms::Segments::SegmentView).
+       * Its type is given by \ref SegmentViewType.
+       *
+       * \par Example
+       * \include Algorithms/Segments/SegmentsExample_CSR_forSegments.cpp
+       * \par Output
+       * \include SegmentsExample_CSR_forSegments.out
+       */
       template< typename Function >
-      void forSegments( IndexType begin, IndexType end, Function&& f ) const;
+      void forSegments( IndexType begin, IndexType end, Function&& function ) const;
 
+      /**
+       * \brief Call \ref TNL::Algorithms::Segments::CSR::forSegments for all segments.
+       *
+       * See \ref TNL::Algorithms::Segments::CSR::forSegments for more details.
+       */
       template< typename Function >
-      void forAllSegments( Function&& f ) const;
+      void forAllSegments( Function&& function ) const;
 
+      /**
+       * \brief Call \ref TNL::Algorithms::Segments::CSR::forSegments sequentially for particular segments.
+       *
+       * With this method, the given segments are processed sequentially one-by-one. This is usefull for example
+       * for printing of segments based data structures or for debugging reasons.
+       *
+       * \param begin defines begining of an interval [ \e begin, \e end ) of segments on
+       *    elements of which we want to apply the lambda function.
+       * \param end defines end of an interval [ \e begin, \e end ) of segments on
+       *    elements of which we want to apply the lambda function.
+       * \param function is the lambda function to be applied on the elements of the segments.
+       *
+       * See \ref TNL::Algorithms::Segments::CSR::forSegments for more details.
+       *
+       * \par Example
+       * \include Algorithms/Segments/SegmentsExample_CSR_sequentialForSegments.cpp
+       * \par Output
+       * \include SegmentsExample_CSR_sequentialForSegments.out
+       */
       template< typename Function >
-      void sequentialForSegments( IndexType begin, IndexType end, Function&& f ) const;
+      void sequentialForSegments( IndexType begin, IndexType end, Function&& function ) const;
 
+      /**
+       * \brief Call \ref TNL::Algorithms::Segments::CSR::sequentialForSegments for all segments.
+       *
+       * See \ref TNL::Algorithms::Segments::CSR::sequentialForSegments for more details.
+       */
       template< typename Function >
       void sequentialForAllSegments( Function&& f ) const;
 
-
-      /***
-       * \brief Go over all segments and perform a reduction in each of them.
+      /**
+       * \brief Compute reduction in each segment.
+       *
+       * \tparam Fetch is type of lambda function for data fetching.
+       * \tparam Reduce is a reduction operation.
+       * \tparam Keep is lambda function for storing results from particular segments.
+       *
+       * \param begin defines begining of an interval [ \e begin, \e end ) of segments in
+       *    which we want to perform the reduction.
+       * \param end defines and of an interval [ \e begin, \e end ) of segments in
+       *    which we want to perform the reduction.
+       * \param fetch is a lambda function for fetching of data. It is suppos have one of the
+       *  following forms:
+       * 1. Full form
+       *  ```
+       *  auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) { ... }
+       *  ```
+       * 2. Brief form
+       * ```
+       * auto fetch = [=] __cuda_callable__ ( IndexType globalIdx, bool& compute ) { ... }
+       * ```
+       * where for both variants \e segmentIdx is segment index, \e localIdx is a rank of element in the segment, \e globalIdx is index of the element
+       * in related container and \e compute is a boolean variable which serves for stopping the reduction if it is set to \e false. It is however,
+       * only a hint and the real behaviour depends on type of kernel used ofr the redcution.
+       * Some kernels are optimized so that they can be significantly faster with the brief variant of the \e fetch lambda function.
+       * \param reduce is a lambda function representing the reduction opeartion. It is supposed to be defined as:
+       *
+       * ```
+       * auto reduce = [=] __cuda_callable__ ( const Value& a, const Value& b ) -> Value { ... }
+       * ```
+       *
+       * where \e a and \e b are values to be reduced and the lambda function returns result of the reduction.
+       * \param keep is a lambda function for saving results from particular segments. It is supposed to be defined as:
+       *
+       * ```
+       * auto keep = [=] __cuda_callable__ ( IndexType segmentIdx, const Value& value ) { ... }
+       * ```
+       *
+       * where \e segmentIdx is an index of the segment and \e value is the result of the reduction in given segment to be stored.
+       *
+       * \par Example
+       * \include Algorithms/Segments/SegmentsExample_CSR_reduceSegments.cpp
+       * \par Output
+       * \include SegmentsExample_CSR_reduceSegments.out
        */
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real >
-      void reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename Value >
+      void reduceSegments( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const Value& zero ) const;
 
-      template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real >
-      void reduceAllSegments( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const;
+      /**
+       * \brief Call \ref TNL::Algorithms::Segments::CSR::reduceSegments for all segments.
+       *
+       * See \ref TNL::Algorithms::Segments::CSR::reduceSegments for more details.
+       */
+      template< typename Fetch, typename Reduce, typename Keep, typename Value >
+      void reduceAllSegments( Fetch& fetch, const Reduce& reduce, Keep& keep, const Value& zero ) const;
 
-      CSR& operator=( const CSR& rhsSegments ) = default;
+      /**
+       * \brief Assignment operator.
+       *
+       * It makes a deep copy of the source segments.
+       *
+       * \param source are the CSR segments to be assigned.
+       * \return reference to this instance.
+       */
+      CSR& operator=( const CSR& source ) = default;
 
+      /**
+       * \brief Assignment operator with CSR segments with different template parameters.
+       *
+       * It makes a deep copy of the source segments.
+       *
+       * \tparam Device_ is device type of the source segments.
+       * \tparam Index_ is the index type of the source segments.
+       * \tparam Kernel_ is the kernel type of the source segments.
+       * \tparam IndexAllocator_ is the index allocator of the source segments.
+       * \param source is the source segments object.
+       * \return reference to this instance.
+       */
       template< typename Device_, typename Index_, typename Kernel_, typename IndexAllocator_ >
       CSR& operator=( const CSR< Device_, Index_, Kernel_, IndexAllocator_ >& source );
 
+      /**
+       * \brief Method for saving the segments to a file in a binary form.
+       *
+       * \param file is the target file.
+       */
       void save( File& file ) const;
 
+      /**
+       * \brief Method for loading the segments from a file in a binary form.
+       *
+       * \param file is the source file.
+       */
       void load( File& file );
 
    protected:
@@ -343,6 +493,17 @@ class CSR
       KernelType kernel;
 };
 
+/**
+ * \brief Insertion operator of CSR segments to output stream.
+ *
+ * \tparam Device is the device type of the source segments.
+ * \tparam Index is the index type of the source segments.
+ * \tparam Kernel is kernel type of the source segments.
+ * \tparam IndexAllocator is the index allocator of the source segments.
+ * \param str is the output stream.
+ * \param segments are the source segments.
+ * \return reference to the output stream.
+ */
 template< typename Device,
           typename Index,
           typename Kernel,
diff --git a/src/TNL/Algorithms/Segments/SegmentElement.h b/src/TNL/Algorithms/Segments/SegmentElement.h
index 68088ba22c..71f78cdd37 100644
--- a/src/TNL/Algorithms/Segments/SegmentElement.h
+++ b/src/TNL/Algorithms/Segments/SegmentElement.h
@@ -18,26 +18,55 @@ namespace TNL {
    namespace Algorithms {
       namespace Segments {
 
-
+/**
+ * \brief Simple structure representing one element of a segment.
+ *
+ * \tparam Index is type used for indexing of the elements.
+ */
 template< typename Index >
 class SegmentElement
 {
    public:
 
+      /**
+       * \brief Type used for indexing of the elements.
+       */
       using IndexType = Index;
 
+      /**
+       * \brief Constructor of the segment element with all parameters.
+       *
+       * \param segmentIdx is in index of the parent segment.
+       * \param localIdx is a rank of the element in the segment.
+       * \param globalIdx is an index of the element in the related container.
+       */
       __cuda_callable__
       SegmentElement( const IndexType& segmentIdx,
                       const IndexType& localIdx,
                       const IndexType globalIdx )
       : segmentIdx( segmentIdx ), localIdx( localIdx ), globalIdx( globalIdx ) {};
 
+      /**
+       * \brief Returns index of the parent segment.
+       *
+       * \return index of the parent segment.
+       */
       __cuda_callable__
       const IndexType& segmentIndex() const { return segmentIdx; };
 
+      /**
+       * \brief Returns rank of the element in the segment.
+       *
+       * \return rank of the element in the segment.
+       */
       __cuda_callable__
       const IndexType& localIndex() const { return localIdx; };
 
+      /**
+       * \brief Returns index of the element in the related container.
+       *
+       * \return index of the element in the related container.
+       */
       __cuda_callable__
       const IndexType& globalIndex() const { return globalIdx; };
 
@@ -48,8 +77,6 @@ class SegmentElement
       const IndexType& localIdx;
 
       const IndexType globalIdx;
-
-
 };
 
       } // namespace Segments
diff --git a/src/TNL/Algorithms/Segments/SegmentView.h b/src/TNL/Algorithms/Segments/SegmentView.h
index 399e3ddd14..aac6e0a940 100644
--- a/src/TNL/Algorithms/Segments/SegmentView.h
+++ b/src/TNL/Algorithms/Segments/SegmentView.h
@@ -17,19 +17,48 @@ namespace TNL {
    namespace Algorithms {
       namespace Segments {
 
+/**
+ * \brief Data structure for accessing particular segment.
+ *
+ * \tparam Index is type for indexing elements in related segments.
+ *
+ * See the template specializations \ref TNL::Algorithms::Segments::SegmentView< Index, ColumnMajorOrder >
+ *  and \ref TNL::Algorithms::Segments::SegmentView< Index, RowMajorOrder > for column-major
+ * and row-major elements organization respectively. They have equivalent interface.
+ */
 template< typename Index,
           ElementsOrganization Organization >
 class SegmentView;
 
+
+/**
+ * \brief Data structure for accessing particular segment.
+ *
+ * \tparam Index is type for indexing elements in related segments.
+ */
 template< typename Index >
 class SegmentView< Index, ColumnMajorOrder >
 {
    public:
 
+      /**
+       * \brief Type for indexing elements in related segments.
+       */
       using IndexType = Index;
 
+      /**
+       * \brief Type of iterator for iterating over elements of the segment.
+       */
       using IteratorType = SegmentViewIterator< SegmentView >;
 
+      /**
+       * \brief Conctructor with all parameters.
+       *
+       * \param segmentIdx is an index of segment the segment view will point to.
+       * \param offset is an offset of the segment in the parent segments.
+       * \param size is a size of the segment.
+       * \param step is stepping between neighbouring elements in the segment.
+       */
       __cuda_callable__
       SegmentView( const IndexType segmentIdx,
                    const IndexType offset,
@@ -37,16 +66,32 @@ class SegmentView< Index, ColumnMajorOrder >
                    const IndexType step )
       : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ), step( step ){};
 
+      /**
+       * \brief Copy constructor.
+       *
+       * \param view is the source view.
+       */
       __cuda_callable__
       SegmentView( const SegmentView& view )
       : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){};
 
+      /**
+       * \brief Get the size of the segment, i.e. number of elements in the segment.
+       *
+       * \return number of elements in the segment.
+       */
       __cuda_callable__
       const IndexType& getSize() const
       {
          return this->segmentSize;
       };
 
+      /**
+       * \brief Get global index of an element with rank \e localIndex in the segment.
+       *
+       * \param localIndex is the rank of the element in the segment.
+       * \return global index of the element.
+       */
       __cuda_callable__
       IndexType getGlobalIndex( const IndexType localIndex ) const
       {
@@ -54,6 +99,11 @@ class SegmentView< Index, ColumnMajorOrder >
          return segmentOffset + localIndex * step;
       };
 
+      /**
+       * \brief Get index of the segment.
+       *
+       * \return index of the segment.
+       */
       __cuda_callable__
       const IndexType& getSegmentIndex() const
       {
@@ -102,10 +152,24 @@ class SegmentView< Index, RowMajorOrder >
 {
    public:
 
+      /**
+       * \brief Type for indexing elements in related segments.
+       */
       using IndexType = Index;
 
+      /**
+       * \brief Type of iterator for iterating over elements of the segment.
+       */
       using IteratorType = SegmentViewIterator< SegmentView >;
 
+      /**
+       * \brief Conctructor with all parameters.
+       *
+       * \param segmentIdx is an index of segment the segment view will point to.
+       * \param offset is an offset of the segment in the parent segments.
+       * \param size is a size of the segment.
+       * \param step is stepping between neighbouring elements in the segment.
+       */
       __cuda_callable__
       SegmentView( const IndexType segmentIdx,
                    const IndexType offset,
@@ -113,12 +177,32 @@ class SegmentView< Index, RowMajorOrder >
                    const IndexType step = 1 ) // For compatibility with previous specialization
       : segmentIdx( segmentIdx ), segmentOffset( offset ), segmentSize( size ){};
 
+      /**
+       * \brief Copy constructor.
+       *
+       * \param view is the source view.
+       */
+      __cuda_callable__
+      SegmentView( const SegmentView& view )
+      : segmentIdx( view.segmentIdx ), segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ) {};
+
+      /**
+       * \brief Get the size of the segment, i.e. number of elements in the segment.
+       *
+       * \return number of elements in the segment.
+       */
       __cuda_callable__
       const IndexType& getSize() const
       {
          return this->segmentSize;
       };
 
+      /**
+       * \brief Get global index of an element with rank \e localIndex in the segment.
+       *
+       * \param localIndex is the rank of the element in the segment.
+       * \return global index of the element.
+       */
       __cuda_callable__
       IndexType getGlobalIndex( const IndexType localIndex ) const
       {
@@ -126,6 +210,11 @@ class SegmentView< Index, RowMajorOrder >
          return segmentOffset + localIndex;
       };
 
+      /**
+       * \brief Get index of the segment.
+       *
+       * \return index of the segment.
+       */
       __cuda_callable__
       const IndexType& getSegmentIndex() const
       {
diff --git a/src/TNL/Algorithms/Segments/SegmentViewIterator.h b/src/TNL/Algorithms/Segments/SegmentViewIterator.h
index 335ce91aa1..a0e7888326 100644
--- a/src/TNL/Algorithms/Segments/SegmentViewIterator.h
+++ b/src/TNL/Algorithms/Segments/SegmentViewIterator.h
@@ -19,6 +19,13 @@ namespace TNL {
    namespace Algorithms {
       namespace Segments {
 
+/**
+ * \brief Iterator for iterating over elements of a segment.
+ *
+ * The iterator can be used even in GPU kernels.
+ *
+ * \tparam SegmentView is a type of related segment view.
+ */
 template< typename SegmentView >
 class SegmentViewIterator
 {
@@ -61,12 +68,28 @@ class SegmentViewIterator
       __cuda_callable__
       bool operator!=( const SegmentViewIterator& other ) const;
 
+      /**
+       * \brief Operator for incrementing the iterator, i.e. moving to the next element.
+       *
+       * \return reference to this iterator.
+       */
       __cuda_callable__
       SegmentViewIterator& operator++();
 
+      /**
+       * \brief Operator for decrementing the iterator, i.e. moving to the previous element.
+       *
+       * \return reference to this iterator.
+       */
       __cuda_callable__
       SegmentViewIterator& operator--();
 
+      /**
+       * \brief Operator for derefrencing the iterator.
+       *
+       * It returns structure \ref SegmentElementType which represent one element of a segment.
+       * \return segment element the iterator points to.
+       */
       __cuda_callable__
       const SegmentElementType operator*() const;
 
-- 
GitLab