Commit 4f1dc3af authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Refactored and extended tests for scan and distributed scan

parent 4467323a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -373,6 +373,7 @@ struct CudaScanKernelLauncher
   static void resetMaxGridSize()
   {
      maxGridSize() = Cuda::getMaxGridSize();
      gridsCount() = -1;
   }

   static int& gridsCount()
+24 −0
Original line number Diff line number Diff line
@@ -107,6 +107,9 @@ perform( Vector& v,
   using ValueType = typename Vector::ValueType;
   using IndexType = typename Vector::IndexType;

   if( end <= begin )
      return;

   const IndexType size = end - begin;
   const int max_threads = Devices::Host::getMaxThreadsCount();
   const IndexType block_size = TNL::max( 1024, TNL::roundUpDivision( size, max_threads ) );
@@ -157,6 +160,12 @@ performFirstPhase( Vector& v,
   using ValueType = typename Vector::ValueType;
   using IndexType = typename Vector::IndexType;

   if( end <= begin ) {
      Containers::Array< typename Vector::ValueType, Devices::Sequential > block_results( 1 );
      block_results.setValue( zero );
      return block_results;
   }

   const IndexType size = end - begin;
   const int max_threads = Devices::Host::getMaxThreadsCount();
   const IndexType block_size = TNL::max( 1024, TNL::roundUpDivision( size, max_threads ) );
@@ -204,6 +213,9 @@ performSecondPhase( Vector& v,
   using ValueType = typename Vector::ValueType;
   using IndexType = typename Vector::IndexType;

   if( end <= begin )
      return;

   const IndexType size = end - begin;
   const int max_threads = Devices::Host::getMaxThreadsCount();
   const IndexType block_size = TNL::max( 1024, TNL::roundUpDivision( size, max_threads ) );
@@ -241,6 +253,9 @@ perform( Vector& v,
   using ValueType = typename Vector::ValueType;
   using IndexType = typename Vector::IndexType;

   if( end <= begin )
      return;

   detail::CudaScanKernelLauncher< Type, ValueType, IndexType >::perform(
      end - begin,
      &v.getData()[ begin ],  // input
@@ -267,6 +282,12 @@ performFirstPhase( Vector& v,
   using ValueType = typename Vector::ValueType;
   using IndexType = typename Vector::IndexType;

   if( end <= begin ) {
      Containers::Array< typename Vector::ValueType, Devices::Cuda > block_results( 1 );
      block_results.setValue( zero );
      return block_results;
   }

   return detail::CudaScanKernelLauncher< Type, ValueType, IndexType >::performFirstPhase(
      end - begin,
      &v.getData()[ begin ],  // input
@@ -295,6 +316,9 @@ performSecondPhase( Vector& v,
   using ValueType = typename Vector::ValueType;
   using IndexType = typename Vector::IndexType;

   if( end <= begin )
      return;

   detail::CudaScanKernelLauncher< Type, ValueType, IndexType >::performSecondPhase(
      end - begin,
      &v.getData()[ begin ],  // output
+4 −0
Original line number Diff line number Diff line
@@ -64,6 +64,8 @@ inplaceInclusiveScan( Array& array,
                      Reduction&& reduction,
                      typename Array::ValueType zero )
{
   TNL_ASSERT_EQ( reduction( zero, zero ), zero,
                  "zero is not an idempotent value of the reduction operation" );
   using Scan = detail::Scan< typename Array::DeviceType, detail::ScanType::Inclusive >;
   Scan::perform( array, begin, end, std::forward< Reduction >( reduction ), zero );
}
@@ -134,6 +136,8 @@ inplaceExclusiveScan( Array& array,
                      Reduction&& reduction,
                      typename Array::ValueType zero )
{
   TNL_ASSERT_EQ( reduction( zero, zero ), zero,
                  "zero is not an idempotent value of the reduction operation" );
   using Scan = detail::Scan< typename Array::DeviceType, detail::ScanType::Exclusive >;
   Scan::perform( array, begin, end, std::forward< Reduction >( reduction ), zero );
}
+375 −247

File changed.

Preview size limit exceeded, changes collapsed.

+367 −239

File changed.

Preview size limit exceeded, changes collapsed.