Commit 291480a1 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber

Writting segmented scan tutorial.

parent 263bebcc
......@@ -23,6 +23,8 @@ IF( BUILD_CUDA )
ADD_CUSTOM_COMMAND( COMMAND ScanExample > ScanExample.out OUTPUT ScanExample.out )
CUDA_ADD_EXECUTABLE( ExclusiveScanExample ExclusiveScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ExclusiveScanExample > ExclusiveScanExample.out OUTPUT ExclusiveScanExample.out )
CUDA_ADD_EXECUTABLE( SegmentedScanExample SegmentedScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND SegmentedScanExample > SegmentedScanExample.out OUTPUT SegmentedScanExample.out )
ENDIF()
IF( BUILD_CUDA )
......@@ -36,7 +38,8 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
MapReduceExample-1.out
MapReduceExample-2.out
MapReduceExample-3.out
ReductionWithArgument.out
ScanExample.out
ExclusiveScanExample.out )
ReductionWithArgument.out
ScanExample.out
ExclusiveScanExample.out
SegmentedScanExample.out )
ENDIF()
......@@ -11,7 +11,7 @@ using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
template< typename Device >
void scan( Vector< double, Device >& v )
void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
{
/***
* Reduction is sum of two numbers.
......@@ -23,29 +23,31 @@ void scan( Vector< double, Device >& v )
* where the scan is performed, lambda function which is used by the scan and
* zero element (idempotent) of the 'sum' operation.
*/
Scan< Device >::perform( v, 0, v.getSize(), reduce, 0.0 );
SegmentedScan< Device >::perform( v, flags, 0, v.getSize(), reduce, 0.0 );
}
int main( int argc, char* argv[] )
{
/***
* Firstly, test the prefix sum with vectors allocated on CPU.
* Firstly, test the segmented prefix sum with vectors allocated on CPU.
*/
Vector< double, Devices::Host > host_v( 10 );
host_v = 1.0;
std::cout << "host_v = " << host_v << std::endl;
scan( host_v );
std::cout << "The prefix sum of the host vector is " << host_v << "." << std::endl;
Vector< bool, Devices::Host > host_flags{ 1,0,0,1,0,0,0,1,0,1,0,0, 0, 0 };
Vector< double, Devices::Host > host_v { 1,3,5,2,4,6,9,3,5,3,6,9,12,15 };
std::cout << "host_flags = " << host_flags << std::endl;
std::cout << "host_v = " << host_v << std::endl;
segmentedScan( host_v, host_flags );
std::cout << "The segmented prefix sum of the host vector is " << host_v << "." << std::endl;
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( 10 );
cuda_v = 1.0;
std::cout << "cuda_v = " << cuda_v << std::endl;
scan( cuda_v );
std::cout << "The prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
//Vector< bool, Devices::Cuda > cuda_flags{ 1,0,0,1,0,0,0,1,0,1,0,0, 0, 0 };
//Vector< double, Devices::Cuda > cuda_v { 1,3,5,2,4,6,9,3,5,3,6,9,12,15 };
//std::cout << "cuda_flags = " << cuda_flags << std::endl;
//std::cout << "cuda_v = " << cuda_v << std::endl;
//segmentedScan( cuda_v, cuda_flags );
//std::cout << "The segmnted prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
#endif
return EXIT_SUCCESS;
}
......
......@@ -251,4 +251,10 @@ In addition to common scan, we need to encode the segments of the input sequence
[1,0,0,1,0,0,0,1,0,1,0,0, 0, 0]
[1,3,5,2,4,6,9,3,5,3,6,9,12,15]
```
**Note: Segmented scan is not implemented for CUDA yet.**
\include SegmentedScanExample.cpp
The result reads as:
\include SegmentedScanExample.out
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment