Commit 291480a1 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Writting segmented scan tutorial.

parent 263bebcc
Loading
Loading
Loading
Loading
+6 −3
Original line number Diff line number Diff line
@@ -23,6 +23,8 @@ IF( BUILD_CUDA )
   ADD_CUSTOM_COMMAND( COMMAND ScanExample > ScanExample.out OUTPUT ScanExample.out )
   CUDA_ADD_EXECUTABLE( ExclusiveScanExample ExclusiveScanExample.cu )
   ADD_CUSTOM_COMMAND( COMMAND ExclusiveScanExample > ExclusiveScanExample.out OUTPUT ExclusiveScanExample.out )
   CUDA_ADD_EXECUTABLE( SegmentedScanExample SegmentedScanExample.cu )
   ADD_CUSTOM_COMMAND( COMMAND SegmentedScanExample > SegmentedScanExample.out OUTPUT SegmentedScanExample.out )
ENDIF()

IF( BUILD_CUDA )
@@ -38,5 +40,6 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
   MapReduceExample-3.out
   ReductionWithArgument.out
   ScanExample.out
   ExclusiveScanExample.out )
   ExclusiveScanExample.out
   SegmentedScanExample.out )
ENDIF()
+15 −13
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;

template< typename Device >
void scan( Vector< double, Device >& v )
void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
{
   /***
    * Reduction is sum of two numbers.
@@ -23,29 +23,31 @@ void scan( Vector< double, Device >& v )
    * where the scan is performed, lambda function which is used by the scan and
    * zero element (idempotent) of the 'sum' operation.
    */
   Scan< Device >::perform( v, 0, v.getSize(), reduce, 0.0 );
   SegmentedScan< Device >::perform( v, flags, 0, v.getSize(), reduce, 0.0 );
}

int main( int argc, char* argv[] )
{
   /***
    * Firstly, test the prefix sum with vectors allocated on CPU.
    * Firstly, test the segmented prefix sum with vectors allocated on CPU.
    */
   Vector< double, Devices::Host > host_v( 10 );
   host_v = 1.0;
   Vector< bool, Devices::Host > host_flags{ 1,0,0,1,0,0,0,1,0,1,0,0, 0, 0 };
   Vector< double, Devices::Host > host_v { 1,3,5,2,4,6,9,3,5,3,6,9,12,15 };
   std::cout << "host_flags = " << host_flags << std::endl;
   std::cout << "host_v     = " << host_v << std::endl;
   scan( host_v );
   std::cout << "The prefix sum of the host vector is " << host_v << "." << std::endl;
   segmentedScan( host_v, host_flags );
   std::cout << "The segmented prefix sum of the host vector is " << host_v << "." << std::endl;

   /***
    * And then also on GPU.
    */
#ifdef HAVE_CUDA
   Vector< double, Devices::Cuda > cuda_v( 10 );
   cuda_v = 1.0;
   std::cout << "cuda_v = " << cuda_v << std::endl;
   scan( cuda_v );
   std::cout << "The prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
   //Vector< bool, Devices::Cuda > cuda_flags{ 1,0,0,1,0,0,0,1,0,1,0,0, 0, 0 };
   //Vector< double, Devices::Cuda > cuda_v { 1,3,5,2,4,6,9,3,5,3,6,9,12,15 };
   //std::cout << "cuda_flags = " << cuda_flags << std::endl;
   //std::cout << "cuda_v     = " << cuda_v << std::endl;
   //segmentedScan( cuda_v, cuda_flags );
   //std::cout << "The segmnted prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
#endif
   return EXIT_SUCCESS;
}
+6 −0
Original line number Diff line number Diff line
@@ -251,4 +251,10 @@ In addition to common scan, we need to encode the segments of the input sequence
[1,0,0,1,0,0,0,1,0,1,0,0, 0, 0]
[1,3,5,2,4,6,9,3,5,3,6,9,12,15]
```
**Note: Segmented scan is not implemented for CUDA yet.**

\include SegmentedScanExample.cpp

The result reads as:

\include SegmentedScanExample.out