From 9c7dab00ad20c77d288e304a341e2c5c84619757 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Tue, 13 Sep 2016 16:41:49 +0200
Subject: [PATCH] Added OpenMP paralelization to Merson solver.

---
 install                           |  5 +++--
 src/TNL/Devices/Host.cpp          | 12 ++++++------
 src/TNL/Solvers/ODE/Merson.h      |  2 ++
 src/TNL/Solvers/ODE/Merson_impl.h | 29 +++++++++++++++++++++--------
 4 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/install b/install
index a03def2681..a48d882e79 100755
--- a/install
+++ b/install
@@ -108,8 +108,9 @@ then
 fi
 
 
-
-if test x`python src/Tools/python-path-test.py 2> /dev/null` != xOK;
+PYTHON_TEST="`python src/Tools/python-path-test.py 2> /dev/null`"
+echo "xxxxx ${PYTHON_TEST} xxxxx\n"
+if test PYTHON_TEST != "xOK";
 then
     source ${BUILD_PREFIX}/python-version    
     echo ""
diff --git a/src/TNL/Devices/Host.cpp b/src/TNL/Devices/Host.cpp
index 4862262f2e..f46678c2d7 100644
--- a/src/TNL/Devices/Host.cpp
+++ b/src/TNL/Devices/Host.cpp
@@ -74,11 +74,11 @@ int Host::getThreadIdx()
 void Host::configSetup( Config::ConfigDescription& config, const String& prefix )
 {
 #ifdef HAVE_OPENMP
-   config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true );
-   config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
+   config.addEntry< bool >( prefix + "openmp-enabled", "Enable support of OpenMP.", true );
+   config.addEntry<  int >( prefix + "openmp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
 #else
-   config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false );
-   config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
+   config.addEntry< bool >( prefix + "openmp-enabled", "Enable support of OpenMP (not supported on this system).", false );
+   config.addEntry<  int >( prefix + "openmp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
 #endif
  
 }
@@ -86,11 +86,11 @@ void Host::configSetup( Config::ConfigDescription& config, const String& prefix
 bool Host::setup( const Config::ParameterContainer& parameters,
                   const String& prefix )
 {
-   if( parameters.getParameter< bool >( prefix + "omp-enabled" ) )
+   if( parameters.getParameter< bool >( prefix + "openmp-enabled" ) )
       enableOMP();
    else
       disableOMP();
-   setMaxThreadsCount( parameters.getParameter< int >( prefix + "omp-max-threads" ) );
+   setMaxThreadsCount( parameters.getParameter< int >( prefix + "openmp-max-threads" ) );
    return true;
 }
 
diff --git a/src/TNL/Solvers/ODE/Merson.h b/src/TNL/Solvers/ODE/Merson.h
index 88353cd054..b4570ce2e9 100644
--- a/src/TNL/Solvers/ODE/Merson.h
+++ b/src/TNL/Solvers/ODE/Merson.h
@@ -70,6 +70,8 @@ class Merson : public ExplicitSolver< Problem >
     * This controls the accuracy of the solver
     */
    RealType adaptivity;
+   
+   Containers::Vector< RealType, DeviceType, IndexType > openMPErrorEstimateBuffer;
 };
 
 } // namespace ODE
diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h
index dc9ab75886..854f976b1b 100644
--- a/src/TNL/Solvers/ODE/Merson_impl.h
+++ b/src/TNL/Solvers/ODE/Merson_impl.h
@@ -14,6 +14,8 @@
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Config/ParameterContainer.h>
 
+#include "Merson.h"
+
 namespace TNL {
 namespace Solvers {
 namespace ODE {   
@@ -85,6 +87,10 @@ template< typename Problem >
 Merson< Problem > :: Merson()
 : adaptivity( 0.00001 )
 {
+   if( std::is_same< DeviceType, Devices::Host >::value )
+   {
+      this->openMPErrorEstimateBuffer.setSize( std::max( 1, Devices::Host::getMaxThreadsCount() ) );
+   }
 };
 
 template< typename Problem >
@@ -378,16 +384,23 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
    RealType eps( 0.0 ), maxEps( 0.0 );
    if( std::is_same< DeviceType, Devices::Host >::value )
    {
-      // TODO: implement OpenMP support
-      for( IndexType i = 0; i < size; i ++  )
+      this->openMPErrorEstimateBuffer.setValue( 0.0 );
+#pragma omp parallel if( Devices::Host::isOMPEnabled() )
       {
-         RealType err = ( RealType ) ( tau / 3.0 *
-                              abs( 0.2 * _k1[ i ] +
-                                  -0.9 * _k3[ i ] +
-                                   0.8 * _k4[ i ] +
-                                  -0.1 * _k5[ i ] ) );
-         eps = max( eps, err );
+         RealType localEps( 0.0 );
+#pragma omp for
+         for( IndexType i = 0; i < size; i ++  )
+         {
+            RealType err = ( RealType ) ( tau / 3.0 *
+                                 abs( 0.2 * _k1[ i ] +
+                                     -0.9 * _k3[ i ] +
+                                      0.8 * _k4[ i ] +
+                                     -0.1 * _k5[ i ] ) );
+            localEps = max( localEps, err );
+         }
+         this->openMPErrorEstimateBuffer[ Devices::Host::getThreadIdx() ] = localEps;
       }
+      eps = this->openMPErrorEstimateBuffer.max();
    }
    if( std::is_same< DeviceType, Devices::Cuda >::value )
    {
-- 
GitLab