Skip to content
Snippets Groups Projects
Commit 9c7dab00 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Added OpenMP paralelization to Merson solver.

parent 58208d3b
No related branches found
No related tags found
No related merge requests found
...@@ -108,8 +108,9 @@ then ...@@ -108,8 +108,9 @@ then
fi fi
PYTHON_TEST="`python src/Tools/python-path-test.py 2> /dev/null`"
if test x`python src/Tools/python-path-test.py 2> /dev/null` != xOK; echo "xxxxx ${PYTHON_TEST} xxxxx\n"
if test PYTHON_TEST != "xOK";
then then
source ${BUILD_PREFIX}/python-version source ${BUILD_PREFIX}/python-version
echo "" echo ""
......
...@@ -74,11 +74,11 @@ int Host::getThreadIdx() ...@@ -74,11 +74,11 @@ int Host::getThreadIdx()
void Host::configSetup( Config::ConfigDescription& config, const String& prefix ) void Host::configSetup( Config::ConfigDescription& config, const String& prefix )
{ {
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true ); config.addEntry< bool >( prefix + "openmp-enabled", "Enable support of OpenMP.", true );
config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() ); config.addEntry< int >( prefix + "openmp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
#else #else
config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false ); config.addEntry< bool >( prefix + "openmp-enabled", "Enable support of OpenMP (not supported on this system).", false );
config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 ); config.addEntry< int >( prefix + "openmp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
#endif #endif
} }
...@@ -86,11 +86,11 @@ void Host::configSetup( Config::ConfigDescription& config, const String& prefix ...@@ -86,11 +86,11 @@ void Host::configSetup( Config::ConfigDescription& config, const String& prefix
bool Host::setup( const Config::ParameterContainer& parameters, bool Host::setup( const Config::ParameterContainer& parameters,
const String& prefix ) const String& prefix )
{ {
if( parameters.getParameter< bool >( prefix + "omp-enabled" ) ) if( parameters.getParameter< bool >( prefix + "openmp-enabled" ) )
enableOMP(); enableOMP();
else else
disableOMP(); disableOMP();
setMaxThreadsCount( parameters.getParameter< int >( prefix + "omp-max-threads" ) ); setMaxThreadsCount( parameters.getParameter< int >( prefix + "openmp-max-threads" ) );
return true; return true;
} }
......
...@@ -70,6 +70,8 @@ class Merson : public ExplicitSolver< Problem > ...@@ -70,6 +70,8 @@ class Merson : public ExplicitSolver< Problem >
* This controls the accuracy of the solver * This controls the accuracy of the solver
*/ */
RealType adaptivity; RealType adaptivity;
Containers::Vector< RealType, DeviceType, IndexType > openMPErrorEstimateBuffer;
}; };
} // namespace ODE } // namespace ODE
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include <TNL/Devices/Cuda.h> #include <TNL/Devices/Cuda.h>
#include <TNL/Config/ParameterContainer.h> #include <TNL/Config/ParameterContainer.h>
#include "Merson.h"
namespace TNL { namespace TNL {
namespace Solvers { namespace Solvers {
namespace ODE { namespace ODE {
...@@ -85,6 +87,10 @@ template< typename Problem > ...@@ -85,6 +87,10 @@ template< typename Problem >
Merson< Problem > :: Merson() Merson< Problem > :: Merson()
: adaptivity( 0.00001 ) : adaptivity( 0.00001 )
{ {
if( std::is_same< DeviceType, Devices::Host >::value )
{
this->openMPErrorEstimateBuffer.setSize( std::max( 1, Devices::Host::getMaxThreadsCount() ) );
}
}; };
template< typename Problem > template< typename Problem >
...@@ -378,16 +384,23 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t ...@@ -378,16 +384,23 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
RealType eps( 0.0 ), maxEps( 0.0 ); RealType eps( 0.0 ), maxEps( 0.0 );
if( std::is_same< DeviceType, Devices::Host >::value ) if( std::is_same< DeviceType, Devices::Host >::value )
{ {
// TODO: implement OpenMP support this->openMPErrorEstimateBuffer.setValue( 0.0 );
for( IndexType i = 0; i < size; i ++ ) #pragma omp parallel if( Devices::Host::isOMPEnabled() )
{ {
RealType err = ( RealType ) ( tau / 3.0 * RealType localEps( 0.0 );
abs( 0.2 * _k1[ i ] + #pragma omp for
-0.9 * _k3[ i ] + for( IndexType i = 0; i < size; i ++ )
0.8 * _k4[ i ] + {
-0.1 * _k5[ i ] ) ); RealType err = ( RealType ) ( tau / 3.0 *
eps = max( eps, err ); abs( 0.2 * _k1[ i ] +
-0.9 * _k3[ i ] +
0.8 * _k4[ i ] +
-0.1 * _k5[ i ] ) );
localEps = max( localEps, err );
}
this->openMPErrorEstimateBuffer[ Devices::Host::getThreadIdx() ] = localEps;
} }
eps = this->openMPErrorEstimateBuffer.max();
} }
if( std::is_same< DeviceType, Devices::Cuda >::value ) if( std::is_same< DeviceType, Devices::Cuda >::value )
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment