Commit 505d0b68 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Optimized OpenMP thread counts for reduction and multireduction

parent 32c69a11
Loading
Loading
Loading
Loading
+45 −43
Original line number Diff line number Diff line
@@ -50,8 +50,9 @@ reduce( const Result zero,
   const int blocks = size / block_size;

#ifdef HAVE_OPENMP
   if( TNL::Devices::Host::isOMPEnabled() && blocks >= 2 )
#pragma omp parallel
   if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
      const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() );
#pragma omp parallel num_threads(threads)
      {
         // first thread initializes the result array
         #pragma omp single nowait
@@ -105,6 +106,7 @@ reduce( const Result zero,
               result[ k ] = reduction( result[ k ], r[ 4 * k ] );
         }
      }
   }
   else {
#endif
      if( blocks > 1 ) {
+6 −4
Original line number Diff line number Diff line
@@ -53,10 +53,11 @@ reduce( const Index size,
   const int blocks = size / block_size;

#ifdef HAVE_OPENMP
   if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) {
   if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
      // global result variable
      Result result = zero;
#pragma omp parallel
      const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() );
#pragma omp parallel num_threads(threads)
      {
         // initialize array for thread-local results
         Result r[ 4 ] = { zero, zero, zero, zero  };
@@ -145,10 +146,11 @@ reduceWithArgument( const Index size,
   const int blocks = size / block_size;

#ifdef HAVE_OPENMP
   if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) {
   if( Devices::Host::isOMPEnabled() && blocks >= 2 ) {
      // global result variable
      std::pair< Index, Result > result( -1, zero );
#pragma omp parallel
      const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() );
#pragma omp parallel num_threads(threads)
      {
         // initialize array for thread-local results
         Index arg[ 4 ] = { 0, 0, 0, 0 };