Skip to content
Snippets Groups Projects
Commit 4f1dc3af authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Refactored and extended tests for scan and distributed scan

parent 4467323a
No related branches found
No related tags found
1 merge request!100Scan refactoring
......@@ -373,6 +373,7 @@ struct CudaScanKernelLauncher
static void resetMaxGridSize()
{
maxGridSize() = Cuda::getMaxGridSize();
gridsCount() = -1;
}
static int& gridsCount()
......
......@@ -107,6 +107,9 @@ perform( Vector& v,
using ValueType = typename Vector::ValueType;
using IndexType = typename Vector::IndexType;
if( end <= begin )
return;
const IndexType size = end - begin;
const int max_threads = Devices::Host::getMaxThreadsCount();
const IndexType block_size = TNL::max( 1024, TNL::roundUpDivision( size, max_threads ) );
......@@ -157,6 +160,12 @@ performFirstPhase( Vector& v,
using ValueType = typename Vector::ValueType;
using IndexType = typename Vector::IndexType;
if( end <= begin ) {
Containers::Array< typename Vector::ValueType, Devices::Sequential > block_results( 1 );
block_results.setValue( zero );
return block_results;
}
const IndexType size = end - begin;
const int max_threads = Devices::Host::getMaxThreadsCount();
const IndexType block_size = TNL::max( 1024, TNL::roundUpDivision( size, max_threads ) );
......@@ -204,6 +213,9 @@ performSecondPhase( Vector& v,
using ValueType = typename Vector::ValueType;
using IndexType = typename Vector::IndexType;
if( end <= begin )
return;
const IndexType size = end - begin;
const int max_threads = Devices::Host::getMaxThreadsCount();
const IndexType block_size = TNL::max( 1024, TNL::roundUpDivision( size, max_threads ) );
......@@ -241,6 +253,9 @@ perform( Vector& v,
using ValueType = typename Vector::ValueType;
using IndexType = typename Vector::IndexType;
if( end <= begin )
return;
detail::CudaScanKernelLauncher< Type, ValueType, IndexType >::perform(
end - begin,
&v.getData()[ begin ], // input
......@@ -267,6 +282,12 @@ performFirstPhase( Vector& v,
using ValueType = typename Vector::ValueType;
using IndexType = typename Vector::IndexType;
if( end <= begin ) {
Containers::Array< typename Vector::ValueType, Devices::Cuda > block_results( 1 );
block_results.setValue( zero );
return block_results;
}
return detail::CudaScanKernelLauncher< Type, ValueType, IndexType >::performFirstPhase(
end - begin,
&v.getData()[ begin ], // input
......@@ -295,6 +316,9 @@ performSecondPhase( Vector& v,
using ValueType = typename Vector::ValueType;
using IndexType = typename Vector::IndexType;
if( end <= begin )
return;
detail::CudaScanKernelLauncher< Type, ValueType, IndexType >::performSecondPhase(
end - begin,
&v.getData()[ begin ], // output
......
......@@ -64,6 +64,8 @@ inplaceInclusiveScan( Array& array,
Reduction&& reduction,
typename Array::ValueType zero )
{
TNL_ASSERT_EQ( reduction( zero, zero ), zero,
"zero is not an idempotent value of the reduction operation" );
using Scan = detail::Scan< typename Array::DeviceType, detail::ScanType::Inclusive >;
Scan::perform( array, begin, end, std::forward< Reduction >( reduction ), zero );
}
......@@ -134,6 +136,8 @@ inplaceExclusiveScan( Array& array,
Reduction&& reduction,
typename Array::ValueType zero )
{
TNL_ASSERT_EQ( reduction( zero, zero ), zero,
"zero is not an idempotent value of the reduction operation" );
using Scan = detail::Scan< typename Array::DeviceType, detail::ScanType::Exclusive >;
Scan::perform( array, begin, end, std::forward< Reduction >( reduction ), zero );
}
......
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment