Commit 552e90c4 authored by Jakub Klinkovský's avatar Jakub Klinkovský

Fixed distributed scan without OpenMP

parent bb04a590
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <TNL/Assert.h> #include <TNL/Assert.h>
#include <TNL/Containers/Array.h> #include <TNL/Containers/Array.h>
#include <TNL/Containers/StaticArray.h>
#include <TNL/Containers/Algorithms/CudaScanKernel.h> #include <TNL/Containers/Algorithms/CudaScanKernel.h>
#include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/Exceptions/NotImplementedError.h> #include <TNL/Exceptions/NotImplementedError.h>
...@@ -98,9 +99,15 @@ performFirstPhase( Vector& v, ...@@ -98,9 +99,15 @@ performFirstPhase( Vector& v,
// block_sums now contains shift values for each block - to be used in the second phase // block_sums now contains shift values for each block - to be used in the second phase
return block_sums; return block_sums;
#else #else
// FIXME: StaticArray does not have getElement() which is used in DistributedScan
// return StaticArray< 1, RealType > block_sums;
Array< RealType, Devices::Host > block_sums( 1 );
block_sums[ 0 ] = zero;
if( Type == ScanType::Inclusive ) { if( Type == ScanType::Inclusive ) {
for( IndexType i = begin + 1; i < end; i++ ) for( IndexType i = begin + 1; i < end; i++ )
v[ i ] = reduction( v[ i ], v[ i - 1 ] ); v[ i ] = reduction( v[ i ], v[ i - 1 ] );
block_sums[ 0 ] = v[ end - 1 ];
} }
else // Exclusive prefix sum else // Exclusive prefix sum
{ {
...@@ -110,9 +117,10 @@ performFirstPhase( Vector& v, ...@@ -110,9 +117,10 @@ performFirstPhase( Vector& v,
v[ i ] = aux; v[ i ] = aux;
aux = reduction( aux, x ); aux = reduction( aux, x );
} }
block_sums[ 0 ] = aux;
} }
return 0; return block_sums;
#endif #endif
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment