Commit 41b5a51a authored by Tomáš Oberhuber's avatar Tomáš Oberhuber

Finished tutorial on scan.

parent 0468914d
......@@ -21,6 +21,10 @@ IF( BUILD_CUDA )
ADD_CUSTOM_COMMAND( COMMAND ReductionWithArgument > ReductionWithArgument.out OUTPUT ReductionWithArgument.out )
CUDA_ADD_EXECUTABLE( ScanExample ScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ScanExample > ScanExample.out OUTPUT ScanExample.out )
CUDA_ADD_EXECUTABLE( ScanExample ScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ScanExample > ScanExample.out OUTPUT ScanExample.out )
CUDA_ADD_EXECUTABLE( ExclusiveScanExample ExclusiveScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ExclusiveScanExample > ExclusiveScanExample.out OUTPUT ExclusiveScanExample.out )
ENDIF()
IF( BUILD_CUDA )
......@@ -34,6 +38,7 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
MapReduceExample-1.out
MapReduceExample-2.out
MapReduceExample-3.out
ReductionWithArgument.out )
ScanExample.out )
ReductionWithArgument.out
ScanExample.out
ExclusiveScanExample.out )
ENDIF()
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Array.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Containers/StaticVector.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
template< typename Device >
void scan( Vector< double, Device >& v )
{
/***
* Reduction is sum of two numbers.
*/
auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; };
/***
* As parameters, we pass vector on which the scan is to be performed, interval
* where the scan is performed, lambda function which is used by the scan and
* zero element (idempotent) of the 'sum' operation.
*/
Scan< Device, ScanType::Exclusive >::perform( v, 0, v.getSize(), reduce, 0.0 );
}
int main( int argc, char* argv[] )
{
/***
* Firstly, test the exclusive prefix sum with vectors allocated on CPU.
*/
Vector< double, Devices::Host > host_v( 10 );
host_v = 1.0;
std::cout << "host_v = " << host_v << std::endl;
scan( host_v );
std::cout << "The exclusive prefix sum of the host vector is " << host_v << "." << std::endl;
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( 10 );
cuda_v = 1.0;
std::cout << "cuda_v = " << cuda_v << std::endl;
scan( cuda_v );
std::cout << "The exclusive prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
#endif
return EXIT_SUCCESS;
}
ExclusiveScanExample.cpp
\ No newline at end of file
......@@ -11,29 +11,19 @@ using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
template< typename Device >
void scan( const Vector< double, Device >& v )
void scan( Vector< double, Device >& v )
{
/****
* Get vector view which can be captured by lambda.
*/
auto view = v.getConstView();
/****
* The fetch function just reads elements of vector v.
*/
auto fetch = [=] __cuda_callable__ ( int i ) -> double { return view[ i ]; };
/***
* Reduction is sum of two numbers.
*/
auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; };
/***
* Finally we call the templated function Reduction and pass number of elements to reduce,
* lambdas defined above and finally value of idempotent element, zero in this case, which serve for the
* reduction initiation.
* As parameters, we pass vector on which the scan is to be performed, interval
* where the scan is performed, lambda function which is used by the scan and
* zero element (idempotent) of the 'sum' operation.
*/
Scan< Device >::perform( view, 0, view.getSize(), reduce, 0.0 );
Scan< Device >::perform( v, 0, v.getSize(), reduce, 0.0 );
}
int main( int argc, char* argv[] )
......@@ -55,7 +45,7 @@ int main( int argc, char* argv[] )
cuda_v = 1.0;
std::cout << "cuda_v = " << cuda_v << std::endl;
scan( cuda_v );
std::cout << "The prefisx sum of the CUDA vector is " << cuda_v << "." << std::endl;
std::cout << "The prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
#endif
return EXIT_SUCCESS;
}
......
......@@ -168,7 +168,7 @@ The result looks as:
## Flexible scan<a name="flexible_scan"></a>
Inclusive scan ( or prefix sum) operation turns a sequence \f$a_1, \ldots, a_n\f$ into a sequence \f$s_1, \ldots, s_n\f$ defined as
Inclusive scan (or prefix sum) operation turns a sequence \f$a_1, \ldots, a_n\f$ into a sequence \f$s_1, \ldots, s_n\f$ defined as
\f[
s_i = \sum_{j=1}^i a_i.
......@@ -180,10 +180,26 @@ Exclusive scan (or prefix sum) is defined as
\sigma_i = \sum_{j=1}^{i-1} a_i.
\f]
Both kinds of [scans](https://en.wikipedia.org/wiki/Prefix_sum)) are usually applied only on sumation, however product or logical operations could be handy as well. In TNL, prefix sum is implemented in simillar way as reduction and so it can be easily modified by lambda functions. The following example shows how it works:
Both kinds of [scan](https://en.wikipedia.org/wiki/Prefix_sum) are usually applied only on sumation, however product or logical operations could be handy as well. In TNL, prefix sum is implemented in simillar way as reduction and so it can be easily modified by lambda functions. The following example shows how it works:
\include ScanExample.cpp
Scan does not use `fetch` function because the scan must be performed on a vector (the first parameter we pass to the scan). Its complexity is also higher compared to reduction. Thus if one needs to do some operation with the vector elements before the scan, this can be done explicitly and it will not affect the performance significantlty. On the other hand, the scan function takes interval of the vector elements where the scan is performed as its second and third argument. The next argument is the operation to be performed by the scan and the last parameter is the idempotent ("zero") element if the operation.
The result looks as:
\include ScanExample.out
Exclusive scan works the same way, we just need to specify it by the second template parameter which is set to `ScanType::Exclusive`. The call of the scan then looks as
```
Scan< Device, ScanType::Exclusive >::perform( v, 0, v.getSize(), reduce, 0.0 );
```
The complete example looks as follows:
\include ExclusivePrefixSum.cpp
And the result looks as:
\include ExcluxivePrefixSum.out
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment