Skip to content
Snippets Groups Projects
Commit 291480a1 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Writting segmented scan tutorial.

parent 263bebcc
No related branches found
No related tags found
1 merge request!41Tutorials
...@@ -23,6 +23,8 @@ IF( BUILD_CUDA ) ...@@ -23,6 +23,8 @@ IF( BUILD_CUDA )
ADD_CUSTOM_COMMAND( COMMAND ScanExample > ScanExample.out OUTPUT ScanExample.out ) ADD_CUSTOM_COMMAND( COMMAND ScanExample > ScanExample.out OUTPUT ScanExample.out )
CUDA_ADD_EXECUTABLE( ExclusiveScanExample ExclusiveScanExample.cu ) CUDA_ADD_EXECUTABLE( ExclusiveScanExample ExclusiveScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ExclusiveScanExample > ExclusiveScanExample.out OUTPUT ExclusiveScanExample.out ) ADD_CUSTOM_COMMAND( COMMAND ExclusiveScanExample > ExclusiveScanExample.out OUTPUT ExclusiveScanExample.out )
CUDA_ADD_EXECUTABLE( SegmentedScanExample SegmentedScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND SegmentedScanExample > SegmentedScanExample.out OUTPUT SegmentedScanExample.out )
ENDIF() ENDIF()
IF( BUILD_CUDA ) IF( BUILD_CUDA )
...@@ -36,7 +38,8 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS ...@@ -36,7 +38,8 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
MapReduceExample-1.out MapReduceExample-1.out
MapReduceExample-2.out MapReduceExample-2.out
MapReduceExample-3.out MapReduceExample-3.out
ReductionWithArgument.out ReductionWithArgument.out
ScanExample.out ScanExample.out
ExclusiveScanExample.out ) ExclusiveScanExample.out
SegmentedScanExample.out )
ENDIF() ENDIF()
...@@ -11,7 +11,7 @@ using namespace TNL::Containers; ...@@ -11,7 +11,7 @@ using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Containers::Algorithms;
template< typename Device > template< typename Device >
void scan( Vector< double, Device >& v ) void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
{ {
/*** /***
* Reduction is sum of two numbers. * Reduction is sum of two numbers.
...@@ -23,29 +23,31 @@ void scan( Vector< double, Device >& v ) ...@@ -23,29 +23,31 @@ void scan( Vector< double, Device >& v )
* where the scan is performed, lambda function which is used by the scan and * where the scan is performed, lambda function which is used by the scan and
* zero element (idempotent) of the 'sum' operation. * zero element (idempotent) of the 'sum' operation.
*/ */
Scan< Device >::perform( v, 0, v.getSize(), reduce, 0.0 ); SegmentedScan< Device >::perform( v, flags, 0, v.getSize(), reduce, 0.0 );
} }
int main( int argc, char* argv[] ) int main( int argc, char* argv[] )
{ {
/*** /***
* Firstly, test the prefix sum with vectors allocated on CPU. * Firstly, test the segmented prefix sum with vectors allocated on CPU.
*/ */
Vector< double, Devices::Host > host_v( 10 ); Vector< bool, Devices::Host > host_flags{ 1,0,0,1,0,0,0,1,0,1,0,0, 0, 0 };
host_v = 1.0; Vector< double, Devices::Host > host_v { 1,3,5,2,4,6,9,3,5,3,6,9,12,15 };
std::cout << "host_v = " << host_v << std::endl; std::cout << "host_flags = " << host_flags << std::endl;
scan( host_v ); std::cout << "host_v = " << host_v << std::endl;
std::cout << "The prefix sum of the host vector is " << host_v << "." << std::endl; segmentedScan( host_v, host_flags );
std::cout << "The segmented prefix sum of the host vector is " << host_v << "." << std::endl;
/*** /***
* And then also on GPU. * And then also on GPU.
*/ */
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( 10 ); //Vector< bool, Devices::Cuda > cuda_flags{ 1,0,0,1,0,0,0,1,0,1,0,0, 0, 0 };
cuda_v = 1.0; //Vector< double, Devices::Cuda > cuda_v { 1,3,5,2,4,6,9,3,5,3,6,9,12,15 };
std::cout << "cuda_v = " << cuda_v << std::endl; //std::cout << "cuda_flags = " << cuda_flags << std::endl;
scan( cuda_v ); //std::cout << "cuda_v = " << cuda_v << std::endl;
std::cout << "The prefix sum of the CUDA vector is " << cuda_v << "." << std::endl; //segmentedScan( cuda_v, cuda_flags );
//std::cout << "The segmnted prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
#endif #endif
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
......
...@@ -251,4 +251,10 @@ In addition to common scan, we need to encode the segments of the input sequence ...@@ -251,4 +251,10 @@ In addition to common scan, we need to encode the segments of the input sequence
[1,0,0,1,0,0,0,1,0,1,0,0, 0, 0] [1,0,0,1,0,0,0,1,0,1,0,0, 0, 0]
[1,3,5,2,4,6,9,3,5,3,6,9,12,15] [1,3,5,2,4,6,9,3,5,3,6,9,12,15]
``` ```
**Note: Segmented scan is not implemented for CUDA yet.**
\include SegmentedScanExample.cpp
The result reads as:
\include SegmentedScanExample.out
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment