Loading src/TNL/ParallelFor.h +50 −11 Original line number Diff line number Diff line Loading @@ -37,10 +37,21 @@ struct ParallelFor static void exec( Index start, Index end, Function f, FunctionArgs... args ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 ) #endif // Benchmarks show that this is significantly faster compared // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )' if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 ) { #pragma omp parallel for for( Index i = start; i < end; i++ ) f( i, args... ); } else for( Index i = start; i < end; i++ ) f( i, args... ); #else for( Index i = start; i < end; i++ ) f( i, args... ); #endif } }; Loading @@ -53,11 +64,24 @@ struct ParallelFor2D static void exec( Index startX, Index startY, Index endX, Index endY, Function f, FunctionArgs... args ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() ) #endif // Benchmarks show that this is significantly faster compared // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )' if( TNL::Devices::Host::isOMPEnabled() ) { #pragma omp parallel for for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) f( i, j, args... ); } else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) f( i, j, args... ); #else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) f( i, j, args... ); #endif } }; Loading @@ -70,12 +94,27 @@ struct ParallelFor3D static void exec( Index startX, Index startY, Index startZ, Index endX, Index endY, Index endZ, Function f, FunctionArgs... args ) { #ifdef HAVE_OPENMP #pragma omp parallel for collapse(2) if( TNL::Devices::Host::isOMPEnabled() ) #endif // Benchmarks show that this is significantly faster compared // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )' if( TNL::Devices::Host::isOMPEnabled() ) { #pragma omp parallel for collapse(2) for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) for( Index k = startZ; k < endZ; k++ ) f( i, j, k, args... ); } else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) for( Index k = startZ; k < endZ; k++ ) f( i, j, k, args... ); #else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) for( Index k = startZ; k < endZ; k++ ) f( i, j, k, args... ); #endif } }; Loading Loading
src/TNL/ParallelFor.h +50 −11 Original line number Diff line number Diff line Loading @@ -37,10 +37,21 @@ struct ParallelFor static void exec( Index start, Index end, Function f, FunctionArgs... args ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 ) #endif // Benchmarks show that this is significantly faster compared // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )' if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 ) { #pragma omp parallel for for( Index i = start; i < end; i++ ) f( i, args... ); } else for( Index i = start; i < end; i++ ) f( i, args... ); #else for( Index i = start; i < end; i++ ) f( i, args... ); #endif } }; Loading @@ -53,11 +64,24 @@ struct ParallelFor2D static void exec( Index startX, Index startY, Index endX, Index endY, Function f, FunctionArgs... args ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() ) #endif // Benchmarks show that this is significantly faster compared // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )' if( TNL::Devices::Host::isOMPEnabled() ) { #pragma omp parallel for for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) f( i, j, args... ); } else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) f( i, j, args... ); #else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) f( i, j, args... ); #endif } }; Loading @@ -70,12 +94,27 @@ struct ParallelFor3D static void exec( Index startX, Index startY, Index startZ, Index endX, Index endY, Index endZ, Function f, FunctionArgs... args ) { #ifdef HAVE_OPENMP #pragma omp parallel for collapse(2) if( TNL::Devices::Host::isOMPEnabled() ) #endif // Benchmarks show that this is significantly faster compared // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )' if( TNL::Devices::Host::isOMPEnabled() ) { #pragma omp parallel for collapse(2) for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) for( Index k = startZ; k < endZ; k++ ) f( i, j, k, args... ); } else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) for( Index k = startZ; k < endZ; k++ ) f( i, j, k, args... ); #else for( Index i = startX; i < endX; i++ ) for( Index j = startY; j < endY; j++ ) for( Index k = startZ; k < endZ; k++ ) f( i, j, k, args... ); #endif } }; Loading