Skip to content
Snippets Groups Projects
Commit 2e26d884 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Tomáš Oberhuber
Browse files

Optimization of ParallelFor on CPU.

parent 075740ec
No related branches found
No related tags found
1 merge request!20Traversers optimizations
......@@ -37,10 +37,21 @@ struct ParallelFor
static void exec( Index start, Index end, Function f, FunctionArgs... args )
{
#ifdef HAVE_OPENMP
#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )
#endif
// Benchmarks show that this is significantly faster compared
// to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )'
if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )
{
#pragma omp parallel for
for( Index i = start; i < end; i++ )
f( i, args... );
}
else
for( Index i = start; i < end; i++ )
f( i, args... );
#else
for( Index i = start; i < end; i++ )
f( i, args... );
#endif
}
};
......@@ -53,11 +64,24 @@ struct ParallelFor2D
static void exec( Index startX, Index startY, Index endX, Index endY, Function f, FunctionArgs... args )
{
#ifdef HAVE_OPENMP
#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )
#endif
// Benchmarks show that this is significantly faster compared
// to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )'
if( TNL::Devices::Host::isOMPEnabled() )
{
#pragma omp parallel for
for( Index i = startX; i < endX; i++ )
for( Index j = startY; j < endY; j++ )
f( i, j, args... );
}
else
for( Index i = startX; i < endX; i++ )
for( Index j = startY; j < endY; j++ )
f( i, j, args... );
#else
for( Index i = startX; i < endX; i++ )
for( Index j = startY; j < endY; j++ )
f( i, j, args... );
for( Index j = startY; j < endY; j++ )
f( i, j, args... );
#endif
}
};
......@@ -70,12 +94,27 @@ struct ParallelFor3D
static void exec( Index startX, Index startY, Index startZ, Index endX, Index endY, Index endZ, Function f, FunctionArgs... args )
{
#ifdef HAVE_OPENMP
#pragma omp parallel for collapse(2) if( TNL::Devices::Host::isOMPEnabled() )
#endif
// Benchmarks show that this is significantly faster compared
// to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )'
if( TNL::Devices::Host::isOMPEnabled() )
{
#pragma omp parallel for collapse(2)
for( Index i = startX; i < endX; i++ )
for( Index j = startY; j < endY; j++ )
for( Index k = startZ; k < endZ; k++ )
f( i, j, k, args... );
for( Index j = startY; j < endY; j++ )
for( Index k = startZ; k < endZ; k++ )
f( i, j, k, args... );
}
else
for( Index i = startX; i < endX; i++ )
for( Index j = startY; j < endY; j++ )
for( Index k = startZ; k < endZ; k++ )
f( i, j, k, args... );
#else
for( Index i = startX; i < endX; i++ )
for( Index j = startY; j < endY; j++ )
for( Index k = startZ; k < endZ; k++ )
f( i, j, k, args... );
#endif
}
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment