Commit 496d4e77 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Writting documentation on reduction.

parent 48b7a7e0
Loading
Loading
Loading
Loading
+162 −0
Original line number Diff line number Diff line
@@ -22,12 +22,59 @@ namespace TNL {
namespace Containers {
namespace Algorithms {

/**
 * \brief Reduction implements [(parallel) reduction](https://en.wikipedia.org/wiki/Reduce_(parallel_pattern)) for vectors and arrays.
 *
 * Reduction can be used for operations having one or more vectors (or arrays) elements is input and returning
 * one number (or element) as output. Some examples of such operations can be vectors/arrays comparison,
 * vector norm, scalar product of two vectors or computing minimum or maximum. If one needs to know even
 * position of the smallest or the largest element, reduction with argument can be used.
 *
 * \tparam Device this parameter says on what device the reduction is gonna be performed.
 * 
 * See \ref Reduction< Devices::Host > and \ref Reduction< Devices::Cuda >.
 */
template< typename Device >
struct Reduction;

template<>
struct Reduction< Devices::Host >
{
   /**
    * \brief Computes reduction on CPU.
    *
    * \tparam Index is a type for indexing.
    * \tparam Result is a type of the reduction result.
    * \tparam ReductionOperation is a lambda function performing the reduction.
    * \tparam DataFetcher is a lambda function for fetching the input data.
    *
    * \param size is number of elements to be reduced.
    * \param reduction is a lambda function defining the reduction operation.
    * \param dataFetcher is a lambda function fetching the input data.
    * \param zero is the idempotent element for the reduction operation, i.e. element which
    *             does not change the result of the reduction.
    * \return result of the reduction
    *
    * The dataFetcher lambda function takes one argument which is index of the element to be fetched:
    *
    * ```
    * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
    * ```
    *
    * The reduction lambda function takes two variables which are supposed to be reduced:
    *
    * ```
    * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
    * ```
    *
    * \par Example
    *
    * \include ReductionAndScan/SumExample.cpp
    *
    * \par Output
    *
    * \include SumExample.out
    */
   template< typename Index,
             typename Result,
             typename ReductionOperation,
@@ -38,6 +85,46 @@ struct Reduction< Devices::Host >
           DataFetcher& dataFetcher,
           const Result& zero );

   /**
    * \brief Computes reduction on CPU and returns position of an element of interest.
    * 
    * For example in case of computing minimal or maximal element in array/vector, 
    * the position of the element having given value can be obtained. The use of this method
    * is, however, more flexible.
    * 
    * \tparam Index is a type for indexing.
    * \tparam Result is a type of the reduction result.
    * \tparam ReductionOperation is a lambda function performing the reduction.
    * \tparam DataFetcher is a lambda function for fetching the input data.
    * 
    * \param size is number of elements to be reduced.
    * \param reduction is a lambda function defining the reduction operation and managing the elements positions.
    * \param dataFetcher is a lambda function fetching the input data.
    * \param zero is the idempotent element for the reduction operation, i.e. element which
    *             does not change the result of the reduction.
    * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first'
    *         is the element position and `pair.second` is the reduction result.
    * 
    * The dataFetcher lambda function takes one argument which is index of the element to be fetched:
    * 
    * ```
    * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
    * ```
    * 
    * The reduction lambda function takes two variables which are supposed to be reduced:
    * 
    * ```
    * auto reduction = [] __cuda_callable__ ( Index& aIdx, const Index& bIdx, const Result& a, const Result& b ) { return ... };
    * ```
    * 
    * \par Example
    * 
    * \include ReductionAndScan/ReductionWithArgument.cpp
    * 
    * \par Output
    * 
    * \include ReductionWithArgument.out
    */
   template< typename Index,
             typename Result,
             typename ReductionOperation,
@@ -52,6 +139,41 @@ struct Reduction< Devices::Host >
template<>
struct Reduction< Devices::Cuda >
{
   /**
    * \brief Computes reduction on GPU.
    *
    * \tparam Index is a type for indexing.
    * \tparam Result is a type of the reduction result.
    * \tparam ReductionOperation is a lambda function performing the reduction.
    * \tparam DataFetcher is a lambda function for fetching the input data.
    *
    * \param size is number of elements to be reduced.
    * \param reduction is a lambda function defining the reduction operation.
    * \param dataFetcher is a lambda function fetching the input data.
    * \param zero is the idempotent element for the reduction operation, i.e. element which
    *             does not change the result of the reduction.
    * \return result of the reduction
    *
    * The dataFetcher lambda function takes one argument which is index of the element to be fetched:
    *
    * ```
    * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
    * ```
    *
    * The reduction lambda function takes two variables which are supposed to be reduced:
    *
    * ```
    * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
    * ```
    *
    * \par Example
    *
    * \include ReductionAndScan/SumExample.cpp
    *
    * \par Output
    *
    * \include SumExample.out
    */
   template< typename Index,
             typename Result,
             typename ReductionOperation,
@@ -62,6 +184,46 @@ struct Reduction< Devices::Cuda >
           DataFetcher& dataFetcher,
           const Result& zero );

   /**
    * \brief Computes reduction on GPU and returns position of an element of interest.
    *
    * For example in case of computing minimal or maximal element in array/vector, 
    * the position of the element having given value can be obtained. The use of this method
    * is, however, more flexible.
    *
    * \tparam Index is a type for indexing.
    * \tparam Result is a type of the reduction result.
    * \tparam ReductionOperation is a lambda function performing the reduction.
    * \tparam DataFetcher is a lambda function for fetching the input data.
    *
    * \param size is number of elements to be reduced.
    * \param reduction is a lambda function defining the reduction operation and managing the elements positions.
    * \param dataFetcher is a lambda function fetching the input data.
    * \param zero is the idempotent element for the reduction operation, i.e. element which
    *             does not change the result of the reduction.
    * \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first'
    *         is the element position and `pair.second` is the reduction result.
    *
    * The dataFetcher lambda function takes one argument which is index of the element to be fetched:
    *
    * ```
    * auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
    * ```
    *
    * The reduction lambda function takes two variables which are supposed to be reduced:
    *
    * ```
    * auto reduction = [] __cuda_callable__ ( Index& aIdx, const Index& bIdx, const Result& a, const Result& b ) { return ... };
    * ```
    *
    * \par Example
    *
    * \include ReductionAndScan/ReductionWithArgument.cpp
    *
    * \par Output
    *
    * \include ReductionWithArgument.out
    */
   template< typename Index,
             typename Result,
             typename ReductionOperation,