Merge branch 'develop' into MIC-devel

db3d7c20 · Vít Hanousek · 5cbb4a1b · b797dcb5 · db3d7c20 · db3d7c20
Commit db3d7c20 authored 8 years ago by Vít Hanousek
--- a/examples/heat-equation/tnl-heat-equation.h
+++ b/examples/heat-equation/tnl-heat-equation.h
@@ -64,14 +64,11 @@ class heatEquationSetter
   typedef Device DeviceType;
   typedef Index IndexType;

-   typedef Containers::StaticVector< MeshType::meshDimensions, Real > Vertex;
-
   static bool run( const Config::ParameterContainer& parameters )
   {
      enum { Dimensions = MeshType::meshDimensions };
      typedef Operators::LinearDiffusion< MeshType, Real, Index > ApproximateOperator;
      typedef Functions::Analytic::Constant< Dimensions, Real > RightHandSide;
-      typedef Containers::StaticVector < MeshType::meshDimensions, Real > Vertex;

      String boundaryConditionsType = parameters.getParameter< String >( "boundary-conditions-type" );
      if( parameters.checkParameter( "boundary-conditions-constant" ) )

--- a/examples/heat-equation/tnl-run-heat-equation-eoc-test
+++ b/examples/heat-equation/tnl-run-heat-equation-eoc-test
 #!/bin/bash

 device="host"
+threadsNumbers="1 2 4 6"
 dimensions="1D 2D 3D"
 #dimensions="1D"
 sizes1D="16 32 64 128 256 512"
@@ -84,6 +85,7 @@ solve()
 {
   timeDiscretisation=$1
   discreteSolver=$2
+   threadsNumber=$3
   ${solverName} --device ${device} \
                 --mesh mesh.tnl \
                 --initial-condition exact-u-00000.tnl \
@@ -113,7 +115,10 @@ solve()
                 --sigma ${sigma} \
                 --time-dependence ${timeDependence} \
                 --snapshot-period ${snapshotPeriod} \
-                 --final-time ${finalTime}
+                 --final-time ${finalTime} \
+                 --refresh-rate 50 \
+                 --openmp-enabled true \
+                 --openmp-max-threads ${threadsNumber}
 }
               
 computeError()
@@ -128,77 +133,84 @@ computeError()

 runTest()
 {
-   for testFunction in ${testFunctions};
+   for threadsNumber in ${threadsNumbers};
   do
-      mkdir -p ${testFunction}
-      cd ${testFunction}
-      setupTestFunction ${testFunction}
-      
-      for dim in ${dimensions};
-      do
-         mkdir -p $dim
-         cd ${dim}
-         if test $dim = 1D;
-         then 
-            sizes=$sizes1D
-         fi
-         if test $dim = 2D;
-         then 
-            sizes=$sizes2D
-         fi
-         if test $dim = 3D;
-         then 
-            sizes=$sizes3D
-         fi
-         
-         lastSize=""
-         for size in $sizes;
-         do
-            mkdir -p $size
-            cd $size
-            echo ""
-            echo ""
-            echo ""
-            if test ! -f computation-done;
-            then
-               touch computation-in-progress
-               echo "========================================================================="
-               echo "===                   SETTING UP THE GRID                             ==="
-               echo "========================================================================="
-               setupGrid $dim $size 
-               echo "========================================================================="
-               echo "===                WRITING THE EXACT SOLUTION                         ==="
-               echo "========================================================================="
-               setInitialCondition $testFunction
-               echo "========================================================================="
-               echo "===                   STARTING THE SOLVER                             ==="
-               echo "========================================================================="
-               #solve explicit merson
-               solve semi-implicit gmres
-               mv computation-in-progress computation-done
-            fi            
-            echo "========================================================================="
-            echo "===                   COMPUTING THE ERROR                             ==="
-            echo "========================================================================="
-            computeError
-            echo "========================================================================="
-            echo "===                     COMPUTING THE EOC                             ==="            
-            echo "========================================================================="
-            if test ! x$lastSize = x;
-            then
-               tnl-err2eoc ../$lastSize/errors.txt errors.txt
-            fi
-            echo "========================================================================="
-            echo "===                     COMPUTATION DONE                              ==="            
-            echo "========================================================================="
-            cd ..
-            lastSize=$size
-         done
+      mkdir -p threads-${threadsNumber}
+      cd threads-${threadsNumber}

-         cd ..
-      done
-      cd ..
-   done
+        for testFunction in ${testFunctions};
+        do
+           mkdir -p ${testFunction}
+           cd ${testFunction}
+           setupTestFunction ${testFunction}
+
+           for dim in ${dimensions};
+           do
+              mkdir -p $dim
+              cd ${dim}
+              if test $dim = 1D;
+              then 
+                 sizes=$sizes1D
+              fi
+              if test $dim = 2D;
+              then 
+                 sizes=$sizes2D
+              fi
+              if test $dim = 3D;
+              then 
+                 sizes=$sizes3D
+              fi
+
+              lastSize=""
+              for size in $sizes;
+              do
+                 mkdir -p $size
+                 cd $size
+                 echo ""
+                 echo ""
+                 echo ""
+                 if test ! -f computation-done;
+                 then
+                    touch computation-in-progress
+                    echo "========================================================================="
+                    echo "===                   SETTING UP THE GRID                             ==="
+                    echo "========================================================================="
+                    setupGrid $dim $size 
+                    echo "========================================================================="
+                    echo "===                WRITING THE EXACT SOLUTION                         ==="
+                    echo "========================================================================="
+                    setInitialCondition $testFunction
+                    echo "========================================================================="
+                    echo "===                   STARTING THE SOLVER                             ==="
+                    echo "========================================================================="
+                    #solve explicit merson ${threadsNumber}
+                    solve semi-implicit gmres ${threadsNumber}
+                    mv computation-in-progress computation-done
+                 fi            
+                 echo "========================================================================="
+                 echo "===                   COMPUTING THE ERROR                             ==="
+                 echo "========================================================================="
+                 computeError
+                 echo "========================================================================="
+                 echo "===                     COMPUTING THE EOC                             ==="            
+                 echo "========================================================================="
+                 if test ! x$lastSize = x;
+                 then
+                    tnl-err2eoc ../$lastSize/errors.txt errors.txt
+                 fi
+                 echo "========================================================================="
+                 echo "===                     COMPUTATION DONE                              ==="            
+                 echo "========================================================================="
+                 cd ..
+                 lastSize=$size
+              done
+
+              cd ..
+           done
+           cd ..
+        done
+        cd ..
+    done
 }

 runTest

--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -24,7 +24,8 @@

 #ifndef NDEBUG   
   
-#ifdef HAVE_CUDA
+// __CUDA_ARCH__ is defined by the compiler only for code executed on GPU
+#ifdef __CUDA_ARCH__
 #define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                                    \
   if( ! ( ___tnl__assert_condition ) )                                                                  \
   {                                                                                                     \
@@ -35,7 +36,7 @@
                                                              \
   }

-#else // HAVE_CUDA
+#else // __CUDA_ARCH__
 #define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                       \
 	if( ! ( ___tnl__assert_condition ) )                                                     \
 	{                                                                                        \
@@ -47,7 +48,8 @@
        /*___tnl__assert_command;*/                                                             \
        throw EXIT_FAILURE;                                                                 \
 	}
-#endif /* HAVE_CUDA */
+#endif // __CUDA_ARCH__
+
 #else /* #ifndef NDEBUG */
 #define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )
 #endif /* #ifndef NDEBUG */
\ No newline at end of file
--- a/src/TNL/Containers/Algorithms/reduction-operations.h
+++ b/src/TNL/Containers/Algorithms/reduction-operations.h
@@ -76,7 +76,7 @@ class tnlParallelReductionMin
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return min( current, data1[ idx ] );
+      return TNL::min( current, data1[ idx ] );
   };

   __cuda_callable__ ResultType initialValue() { return MaxValue< ResultType>(); };
@@ -117,7 +117,7 @@ class tnlParallelReductionMax
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return max( current, data1[ idx ] );
+      return TNL::max( current, data1[ idx ] );
   };

   __cuda_callable__ ResultType initialValue() { return MinValue< ResultType>(); };
@@ -241,7 +241,7 @@ class tnlParallelReductionAbsSum : public tnlParallelReductionSum< Real, Index >
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return current + ::abs( data1[ idx ] );
+      return current + TNL::abs( data1[ idx ] );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };
@@ -270,7 +270,7 @@ class tnlParallelReductionAbsMin : public tnlParallelReductionMin< Real, Index >
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return min( current, abs( data1[ idx ] ) );
+      return TNL::min( current, TNL::abs( data1[ idx ] ) );
   };

   __cuda_callable__ ResultType initialValue() { return MaxValue< ResultType>(); };
@@ -299,7 +299,7 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index >
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return std::max( current, ::abs( data1[ idx ] ) );
+      return std::max( current, TNL::abs( data1[ idx ] ) );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };
@@ -365,7 +365,7 @@ class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index >
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return current + ::pow( ::abs( data1[ idx ] ), p );
+      return current + TNL::pow( TNL::abs( data1[ idx ] ), p );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };
@@ -514,7 +514,7 @@ class tnlParallelReductionDiffMin : public tnlParallelReductionMin< Real, Index
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return min( current, data1[ idx ] - data2[ idx ] );
+      return TNL::min( current, data1[ idx ] - data2[ idx ] );
   };

   __cuda_callable__ ResultType initialValue() { return MaxValue< ResultType>(); };
@@ -543,7 +543,7 @@ class tnlParallelReductionDiffMax : public tnlParallelReductionMax< Real, Index
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return max( current, data1[ idx ] - data2[ idx ] );
+      return TNL::max( current, data1[ idx ] - data2[ idx ] );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };
@@ -572,7 +572,7 @@ class tnlParallelReductionDiffAbsSum : public tnlParallelReductionMax< Real, Ind
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return current + abs( data1[ idx ] - data2[ idx ] );
+      return current + TNL::abs( data1[ idx ] - data2[ idx ] );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };
@@ -601,7 +601,7 @@ class tnlParallelReductionDiffAbsMin : public tnlParallelReductionMin< Real, Ind
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return min( current, abs( data1[ idx ] - data2[ idx ] ) );
+      return TNL::min( current, TNL::abs( data1[ idx ] - data2[ idx ] ) );
   };

   __cuda_callable__ ResultType initialValue() { return MaxValue< ResultType>(); };
@@ -630,7 +630,7 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return max( current, abs( data1[ idx ] - data2[ idx ] ) );
+      return TNL::max( current, TNL::abs( data1[ idx ] - data2[ idx ] ) );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };
@@ -699,7 +699,7 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind
                            const RealType* data1,
                            const RealType* data2 )
   {
-      return current + ::pow( abs( data1[ idx ] - data2[ idx ] ), p );
+      return current + TNL::pow( TNL::abs( data1[ idx ] - data2[ idx ] ), p );
   };

   __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; };

--- a/src/TNL/Containers/Array_impl.h
+++ b/src/TNL/Containers/Array_impl.h
@@ -175,7 +175,7 @@ setSize( const Index size )
   Algorithms::ArrayOperations< Device >::allocateMemory( this->allocationPointer, size );
   this->data = this->allocationPointer;
   this->size = size;
-   if( ! this->allocationPointer )
+   if( size > 0 && ! this->allocationPointer )
   {
      std::cerr << "I am not able to allocate new array with size "
                << ( double ) this->size * sizeof( ElementType ) / 1.0e9 << " GB." << std::endl;
@@ -370,13 +370,14 @@ operator = ( const Array< Element, Device, Index >& array )
   TNL_ASSERT( array. getSize() == this->getSize(),
              std::cerr << "Source size: " << array. getSize() << std::endl
                        << "Target size: " << this->getSize() << std::endl );
-   Algorithms::ArrayOperations< Device >::
-      template copyMemory< Element,
-                           Element,
-                           Index >
-                          ( this->getData(),
-                            array. getData(),
-                            array. getSize() );
+   if( this->getSize() > 0 )
+      Algorithms::ArrayOperations< Device >::
+         template copyMemory< Element,
+                              Element,
+                              Index >
+                             ( this->getData(),
+                               array. getData(),
+                               array. getSize() );
   return ( *this );
 };

@@ -391,13 +392,14 @@ operator = ( const ArrayT& array )
   TNL_ASSERT( array. getSize() == this->getSize(),
              std::cerr << "Source size: " << array. getSize() << std::endl
                        << "Target size: " << this->getSize() << std::endl );
-   Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
-      template copyMemory< Element,
-                           typename ArrayT::ElementType,
-                           typename ArrayT::IndexType >
-                         ( this->getData(),
-                           array. getData(),
-                           array. getSize() );
+   if( this->getSize() > 0 )
+      Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
+         template copyMemory< Element,
+                              typename ArrayT::ElementType,
+                              typename ArrayT::IndexType >
+                            ( this->getData(),
+                              array. getData(),
+                              array. getSize() );
   return ( *this );
 };

@@ -411,6 +413,8 @@ operator == ( const ArrayT& array ) const
 {
   if( array. getSize() != this->getSize() )
      return false;
+   if( this->getSize() == 0 )
+      return true;
   return Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
      template compareMemory< typename ArrayT::ElementType,
                              Element,

--- a/src/TNL/Containers/SharedArray_impl.h
+++ b/src/TNL/Containers/SharedArray_impl.h
@@ -310,6 +310,7 @@ void SharedArray< Element, Device, Index > :: setValue( const Element& e )
 template< typename Element,
          typename Device,
          typename Index >
+__cuda_callable__ 
 const Element* SharedArray< Element, Device, Index > :: getData() const
 {
   return this->data;
@@ -318,6 +319,7 @@ const Element* SharedArray< Element, Device, Index > :: getData() const
 template< typename Element,
          typename Device,
          typename Index >
+__cuda_callable__ 
 Element* SharedArray< Element, Device, Index > :: getData()
 {
   return this->data;

--- a/src/TNL/Containers/StaticArray3D_impl.h
+++ b/src/TNL/Containers/StaticArray3D_impl.h
@@ -68,6 +68,7 @@ String StaticArray< 3, Element >::getType()
 }

 template< typename Element >
+__cuda_callable__
 inline int StaticArray< 3, Element >::getSize() const
 {
   return size;

--- a/src/TNL/Containers/StaticVector_impl.h
+++ b/src/TNL/Containers/StaticVector_impl.h
@@ -182,6 +182,7 @@ StaticVector< Size, Real >::abs() const


 template< int Size, typename Real, typename Scalar >
+__cuda_callable__
 StaticVector< Size, Real > operator * ( const Scalar& c, const StaticVector< Size, Real >& u )
 {
   return u * c;

--- a/src/TNL/Solvers/BuildConfigTags.h
+++ b/src/TNL/Solvers/BuildConfigTags.h
@@ -25,7 +25,7 @@
 namespace TNL {
 namespace Solvers {   

-class tnlDefaultBuildConfigTag{};
+class DefaultBuildConfigTag{};

 /****
 * All devices are enabled by default. Those which are not available

--- a/src/TNL/Solvers/Solver.h
+++ b/src/TNL/Solvers/Solver.h
@@ -17,7 +17,7 @@ namespace Solvers {

 template< template< typename Real, typename Device, typename Index, typename MeshType, typename ConfigTag, typename SolverStarter > class ProblemSetter,
          template< typename ConfTag > class ProblemConfig,
-          typename ConfigTag = tnlDefaultBuildConfigTag >
+          typename ConfigTag = DefaultBuildConfigTag >
 class Solver
 {
   public:

--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -122,7 +122,7 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitSORSolverTag >::enabled )
         config.addEntryEnum( "sor" );
 #ifdef HAVE_UMFPACK
-      if( MeshConfigSemiImplicitSolver< MeshConfig, SemiImplicitUmfpackSolverTag >::enabled )
+      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitUmfpackSolverTag >::enabled )
         config.addEntryEnum( "umfpack" );
 #endif
   }