Loading src/TNL/Containers/Algorithms/Multireduction.hpp +45 −43 Original line number Diff line number Diff line Loading @@ -50,8 +50,9 @@ reduce( const Result zero, const int blocks = size / block_size; #ifdef HAVE_OPENMP if( TNL::Devices::Host::isOMPEnabled() && blocks >= 2 ) #pragma omp parallel if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) { // first thread initializes the result array #pragma omp single nowait Loading Loading @@ -105,6 +106,7 @@ reduce( const Result zero, result[ k ] = reduction( result[ k ], r[ 4 * k ] ); } } } else { #endif if( blocks > 1 ) { Loading src/TNL/Containers/Algorithms/Reduction.hpp +6 −4 Original line number Diff line number Diff line Loading @@ -53,10 +53,11 @@ reduce( const Index size, const int blocks = size / block_size; #ifdef HAVE_OPENMP if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) { if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { // global result variable Result result = zero; #pragma omp parallel const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) { // initialize array for thread-local results Result r[ 4 ] = { zero, zero, zero, zero }; Loading Loading @@ -145,10 +146,11 @@ reduceWithArgument( const Index size, const int blocks = size / block_size; #ifdef HAVE_OPENMP if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) { if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { // global result variable std::pair< Index, Result > result( -1, zero ); #pragma omp parallel const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) { // initialize array for thread-local results Index arg[ 4 ] = { 0, 0, 0, 0 }; Loading Loading
src/TNL/Containers/Algorithms/Multireduction.hpp +45 −43 Original line number Diff line number Diff line Loading @@ -50,8 +50,9 @@ reduce( const Result zero, const int blocks = size / block_size; #ifdef HAVE_OPENMP if( TNL::Devices::Host::isOMPEnabled() && blocks >= 2 ) #pragma omp parallel if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) { // first thread initializes the result array #pragma omp single nowait Loading Loading @@ -105,6 +106,7 @@ reduce( const Result zero, result[ k ] = reduction( result[ k ], r[ 4 * k ] ); } } } else { #endif if( blocks > 1 ) { Loading
src/TNL/Containers/Algorithms/Reduction.hpp +6 −4 Original line number Diff line number Diff line Loading @@ -53,10 +53,11 @@ reduce( const Index size, const int blocks = size / block_size; #ifdef HAVE_OPENMP if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) { if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { // global result variable Result result = zero; #pragma omp parallel const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) { // initialize array for thread-local results Result r[ 4 ] = { zero, zero, zero, zero }; Loading Loading @@ -145,10 +146,11 @@ reduceWithArgument( const Index size, const int blocks = size / block_size; #ifdef HAVE_OPENMP if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) { if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { // global result variable std::pair< Index, Result > result( -1, zero ); #pragma omp parallel const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) { // initialize array for thread-local results Index arg[ 4 ] = { 0, 0, 0, 0 }; Loading