Commit 496d4e77 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber

Writting documentation on reduction.

parent 48b7a7e0
......@@ -22,12 +22,59 @@ namespace TNL {
namespace Containers {
namespace Algorithms {
/**
* \brief Reduction implements [(parallel) reduction](https://en.wikipedia.org/wiki/Reduce_(parallel_pattern)) for vectors and arrays.
*
* Reduction can be used for operations having one or more vectors (or arrays) elements is input and returning
* one number (or element) as output. Some examples of such operations can be vectors/arrays comparison,
* vector norm, scalar product of two vectors or computing minimum or maximum. If one needs to know even
* position of the smallest or the largest element, reduction with argument can be used.
*
* \tparam Device this parameter says on what device the reduction is gonna be performed.
*
* See \ref Reduction< Devices::Host > and \ref Reduction< Devices::Cuda >.
*/
template< typename Device >
struct Reduction;
template<>
struct Reduction< Devices::Host >
{
/**
* \brief Computes reduction on CPU.
*
* \tparam Index is a type for indexing.
* \tparam Result is a type of the reduction result.
* \tparam ReductionOperation is a lambda function performing the reduction.
* \tparam DataFetcher is a lambda function for fetching the input data.
*
* \param size is number of elements to be reduced.
* \param reduction is a lambda function defining the reduction operation.
* \param dataFetcher is a lambda function fetching the input data.
* \param zero is the idempotent element for the reduction operation, i.e. element which
* does not change the result of the reduction.
* \return result of the reduction
*
* The dataFetcher lambda function takes one argument which is index of the element to be fetched:
*
* ```
* auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
* ```
*
* The reduction lambda function takes two variables which are supposed to be reduced:
*
* ```
* auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
* ```
*
* \par Example
*
* \include ReductionAndScan/SumExample.cpp
*
* \par Output
*
* \include SumExample.out
*/
template< typename Index,
typename Result,
typename ReductionOperation,
......@@ -38,6 +85,46 @@ struct Reduction< Devices::Host >
DataFetcher& dataFetcher,
const Result& zero );
/**
* \brief Computes reduction on CPU and returns position of an element of interest.
*
* For example in case of computing minimal or maximal element in array/vector,
* the position of the element having given value can be obtained. The use of this method
* is, however, more flexible.
*
* \tparam Index is a type for indexing.
* \tparam Result is a type of the reduction result.
* \tparam ReductionOperation is a lambda function performing the reduction.
* \tparam DataFetcher is a lambda function for fetching the input data.
*
* \param size is number of elements to be reduced.
* \param reduction is a lambda function defining the reduction operation and managing the elements positions.
* \param dataFetcher is a lambda function fetching the input data.
* \param zero is the idempotent element for the reduction operation, i.e. element which
* does not change the result of the reduction.
* \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first'
* is the element position and `pair.second` is the reduction result.
*
* The dataFetcher lambda function takes one argument which is index of the element to be fetched:
*
* ```
* auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
* ```
*
* The reduction lambda function takes two variables which are supposed to be reduced:
*
* ```
* auto reduction = [] __cuda_callable__ ( Index& aIdx, const Index& bIdx, const Result& a, const Result& b ) { return ... };
* ```
*
* \par Example
*
* \include ReductionAndScan/ReductionWithArgument.cpp
*
* \par Output
*
* \include ReductionWithArgument.out
*/
template< typename Index,
typename Result,
typename ReductionOperation,
......@@ -52,6 +139,41 @@ struct Reduction< Devices::Host >
template<>
struct Reduction< Devices::Cuda >
{
/**
* \brief Computes reduction on GPU.
*
* \tparam Index is a type for indexing.
* \tparam Result is a type of the reduction result.
* \tparam ReductionOperation is a lambda function performing the reduction.
* \tparam DataFetcher is a lambda function for fetching the input data.
*
* \param size is number of elements to be reduced.
* \param reduction is a lambda function defining the reduction operation.
* \param dataFetcher is a lambda function fetching the input data.
* \param zero is the idempotent element for the reduction operation, i.e. element which
* does not change the result of the reduction.
* \return result of the reduction
*
* The dataFetcher lambda function takes one argument which is index of the element to be fetched:
*
* ```
* auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
* ```
*
* The reduction lambda function takes two variables which are supposed to be reduced:
*
* ```
* auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
* ```
*
* \par Example
*
* \include ReductionAndScan/SumExample.cpp
*
* \par Output
*
* \include SumExample.out
*/
template< typename Index,
typename Result,
typename ReductionOperation,
......@@ -62,6 +184,46 @@ struct Reduction< Devices::Cuda >
DataFetcher& dataFetcher,
const Result& zero );
/**
* \brief Computes reduction on GPU and returns position of an element of interest.
*
* For example in case of computing minimal or maximal element in array/vector,
* the position of the element having given value can be obtained. The use of this method
* is, however, more flexible.
*
* \tparam Index is a type for indexing.
* \tparam Result is a type of the reduction result.
* \tparam ReductionOperation is a lambda function performing the reduction.
* \tparam DataFetcher is a lambda function for fetching the input data.
*
* \param size is number of elements to be reduced.
* \param reduction is a lambda function defining the reduction operation and managing the elements positions.
* \param dataFetcher is a lambda function fetching the input data.
* \param zero is the idempotent element for the reduction operation, i.e. element which
* does not change the result of the reduction.
* \return result of the reduction in a form of std::pair< Index, Result> structure. `pair.first'
* is the element position and `pair.second` is the reduction result.
*
* The dataFetcher lambda function takes one argument which is index of the element to be fetched:
*
* ```
* auto dataFetcher1 = [=] __cuda_callable__ ( Index i ) { return ... };
* ```
*
* The reduction lambda function takes two variables which are supposed to be reduced:
*
* ```
* auto reduction = [] __cuda_callable__ ( Index& aIdx, const Index& bIdx, const Result& a, const Result& b ) { return ... };
* ```
*
* \par Example
*
* \include ReductionAndScan/ReductionWithArgument.cpp
*
* \par Output
*
* \include ReductionWithArgument.out
*/
template< typename Index,
typename Result,
typename ReductionOperation,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment