diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0e855035df443b4ee53dc64014fe5d45051777b5..41a8ed529a8444613c42caa843da5e3998a9e5ca 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,7 @@
 # Vladimir Klement
 # Jakub Klinkovsky
 
-cmake_minimum_required( VERSION 3.0 )
+cmake_minimum_required( VERSION 3.4 )
 
 project( tnl )
 
@@ -33,7 +33,6 @@ if( CMAKE_BUILD_TYPE STREQUAL "Debug")
     set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/lib )
     set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/bin )
     set( debugExt -dbg )
-    set( CMAKE_CXX_FLAGS "${CXXFLAGS} -g ")
 else()
     set( PROJECT_BUILD_PATH ${PROJECT_SOURCE_DIR}/Release/src )
     set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Release/tests )
@@ -41,10 +40,12 @@ else()
     set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/lib)
     set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/bin)
 endif()
+
 # set Debug/Release options
 set( CMAKE_CXX_FLAGS "-std=c++11" )
 set( CMAKE_CXX_FLAGS_DEBUG "-g" )
-set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -g" )
+set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG" )
+#set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" )
 # pass -rdynamic only in Debug mode
 set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "" )
 set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "-rdynamic" )
@@ -65,8 +66,8 @@ if( WITH_CUDA STREQUAL "yes" )
         set( BUILD_CUDA TRUE)
         set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
         set(BUILD_SHARED_LIBS ON)
-        set(CUDA_SEPARABLE_COMPILATION ON)
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -DHAVE_CUDA )
+        set(CUDA_SEPARABLE_COMPILATION ON)        
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ;-DHAVE_CUDA  )
         #AddCompilerFlag( "-DHAVE_NOT_CXX11" ) # -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 " )
         set( ALL_CUDA_ARCHS -gencode arch=compute_20,code=sm_20
                             -gencode arch=compute_30,code=sm_30
@@ -110,6 +111,9 @@ if( WITH_CUDA STREQUAL "yes" )
         set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} )
         # TODO: this is necessary only due to a bug in cmake
         set( CUDA_ADD_LIBRARY_OPTIONS -shared )
+        # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1)
+        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -ccbin "${CMAKE_CXX_COMPILER}" )
+        set( CUDA_HOST_COMPILER "" )
 
         ####
         # Check for cuBLAS
@@ -152,7 +156,7 @@ if( WITH_CUDA STREQUAL "yes" )
                cuda_include_directories( ${CUSPARSE_INCLUDE_DIR} )
                set( CUSPARSE_LIBRARY "${CUDA_cusparse_LIBRARY}" )
            endif()            
-        endif( NOT WITH_CUSPARSE STREQUAL "no" )
+        endif( NOT WITH_CUSPARSE STREQUAL "no" )        
     endif( CUDA_FOUND )
 endif( WITH_CUDA STREQUAL "yes" )
 
diff --git a/TODO b/TODO
index 7619d268836f5bf97700a437fa452c3f83055ff9..3f427388a1ec4b4ccb2818f3532ca98bb16b0708 100644
--- a/TODO
+++ b/TODO
@@ -10,6 +10,11 @@ TODO:
  - data by se na hostu preskupila do souvisleho bloku dat a ten se prenesl najednou
 
 
+TODO:
+ - zrejme bude potreba udrzovat ke kazdemu objektu jeho obraz na GPU/MIC
+ - to by zarizovala metoda syncToDevice() napr. kazdy objekt by mel promennou modified, ktera by rikala, jestli se zmenil a zda je nutne ho
+   prekopirovavat
+
 TODO:
  - zavest namespaces
 
@@ -21,11 +26,6 @@ TODO: CUDA unified memory
 se s nimi pracovat postaru
  - bylo by dobre to obalit unique poinetry, aby se nemusela delat dealokace rucne
 
-TODO: shared pointery
- - mohli bysme pomoci nich odstranit Shared objekty
- - asi by bylo lepsi datcounter z shared pointeru primo do array a tento counter by se alokoval az po porvnim sdileni dat
- - diky tomu by se array mohlo vytvaret i na gpu bez nutnosti dynamicke alokace, jen by nebylo mozne delat bind (nebo nejaky zjednoduseny)
-
 TODO: Mesh
  * vsechny traits zkusit presunout do jednotneho MeshTraits, tj. temer MeshConfigTraits ale pojmenovat jako MeshTraits
  * omezit tnlDimesnionsTag - asi to ale nepujde
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f98373d2327cf5fa20e561c789d988a314c2de59..ba9da977671d770fc9d6fa0eaf265b01400a0549 100755
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -22,13 +22,13 @@ set (headers tnlAssert.h
              tnlList.h
              tnlList_impl.h
              tnlLogger.h
-             tnlOmp.h
              tnlObject.h 
              tnlStack.h
              tnlStaticFor.h
              tnlStatistics.h 
              tnlString.h 
              tnlReal.h
+             tnlTimer.h
              tnlTimerCPU.h  
              tnlTimerRT.h    
              mfilename.h 
@@ -48,12 +48,12 @@ set( common_SOURCES
      ${CURRENT_DIR}/tnlObject.cpp
      ${CURRENT_DIR}/tnlStatistics.cpp
      ${CURRENT_DIR}/tnlString.cpp 
+     ${CURRENT_DIR}/tnlTimer.cpp 
      ${CURRENT_DIR}/tnlTimerCPU.cpp      
      ${CURRENT_DIR}/mfilename.cpp 
      ${CURRENT_DIR}/mpi-supp.cpp 
      ${CURRENT_DIR}/tnlCuda.cpp
-     ${CURRENT_DIR}/tnlHost.cpp
-     ${CURRENT_DIR}/tnlOmp.cpp )
+     ${CURRENT_DIR}/tnlHost.cpp )
 
 IF( BUILD_CUDA )
    set( tnl_core_CUDA__SOURCES
diff --git a/src/core/terminal-colors.h b/src/core/terminal-colors.h
index 2c67d00c51a06e57f0300f8a0b9d7a73cb61eda9..9a1d123f21a612a19c4ce00765e6bd13db6712a0 100644
--- a/src/core/terminal-colors.h
+++ b/src/core/terminal-colors.h
@@ -18,12 +18,13 @@
 #ifndef TERMINAL_COLORS_H
 #define	TERMINAL_COLORS_H
 
-const std::string red( "\033[0;31m" );
-const std::string green( "\033[1;32m" );
-const std::string yellow( "\033[1;33m" );
-const std::string cyan( "\033[0;36m" );
-const std::string magenta( "\033[0;35m" );
-const std::string reset( "\033[0m" );
+const tnlString red( "\033[0;31m" );
+const tnlString green( "\033[1;32m" );
+const tnlString yellow( "\033[1;33m" );
+const tnlString cyan( "\033[0;36m" );
+const tnlString magenta( "\033[0;35m" );
+const tnlString bold(); 
+const tnlString reset( "\033[0m" );
 
 
 #endif	/* TERMINAL_COLORS_H */
diff --git a/src/core/tnlCuda.cu b/src/core/tnlCuda.cu
index 4f38ceb9dcaccfe236e890778f5ec9b4ea09a24d..113cba35227a0c461f1a32dc1148cd1e9646ac0f 100644
--- a/src/core/tnlCuda.cu
+++ b/src/core/tnlCuda.cu
@@ -16,27 +16,30 @@
  ***************************************************************************/
 
 #include <core/tnlCuda.h>
+#include <config/tnlConfigDescription.h>
+#include <config/tnlParameterContainer.h>
 
-void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix )
+
+/*void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix )
 {
 #ifdef HAVE_CUDA
-   //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true );
-   //config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
+   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device.", 0 );
 #else
-   //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false );
-   //config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
-#endif
-   
+   config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device (CUDA is not supported on this system).", 0 );   
+#endif   
 }
       
 bool tnlCuda::setup( const tnlParameterContainer& parameters,
                     const tnlString& prefix )
 {
-   //enable = parameters.getParameter< bool >( prefix + "omp-enabled" );
-   //maxThreadsCount = parameters.getParameter< int ( prefix + "omp-max-threads" );
+   int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
+#ifdef HAVE_CUDA
+    cudaSetDevice( cudaDevice );
+    checkCudaDevice;
+#endif
    return true;
 }
-
+*/
 
 bool tnlCuda::checkDevice( const char* file_name, int line )
 {
diff --git a/src/core/tnlHost.cpp b/src/core/tnlHost.cpp
index 7d53f0435cdc88f707af57ad46b0ee45b6b3d8af..639ea76eecbe38758270d950ac8dfdcba5eb9503 100644
--- a/src/core/tnlHost.cpp
+++ b/src/core/tnlHost.cpp
@@ -19,6 +19,15 @@
 #define TNLHOSTL_H_
 
 #include <core/tnlHost.h>
+#ifdef HAVE_OPENMP
+#include <omp.h>
+#endif
+#include <config/tnlConfigDescription.h>
+#include <config/tnlParameterContainer.h>
+
+
+bool tnlHost::ompEnabled( true );
+int tnlHost::maxThreadsCount( -1 );
 
 tnlString tnlHost::getDeviceType()
 {
@@ -32,4 +41,63 @@ size_t tnlHost::getFreeMemory()
    return pages * page_size;
 };
 
+void tnlHost::enableOMP()
+{
+   ompEnabled = true;
+}
+
+void tnlHost::disableOMP()
+{
+   ompEnabled = false;
+}
+
+void tnlHost::setMaxThreadsCount( int maxThreadsCount_ )
+{
+   maxThreadsCount = maxThreadsCount_;
+#ifdef HAVE_OPENMP   
+   omp_set_num_threads( maxThreadsCount );
+#endif   
+}
+
+int tnlHost::getMaxThreadsCount()
+{
+#ifdef HAVE_OPENMP
+   if( maxThreadsCount == -1 )
+      return omp_get_max_threads();
+   return maxThreadsCount;
+#else
+   return 0;
+#endif
+}
+      
+int tnlHost::getThreadIdx()
+{
+#ifdef HAVE_OPENMP
+   return omp_get_thread_num();
+#else
+   return 0;
+#endif  
+}
+
+void tnlHost::configSetup( tnlConfigDescription& config, const tnlString& prefix )
+{
+#ifdef HAVE_OPENMP
+   config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true );
+   config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
+#else
+   config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false );
+   config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
+#endif
+   
+}
+      
+bool tnlHost::setup( const tnlParameterContainer& parameters,
+                    const tnlString& prefix )
+{
+   ompEnabled = parameters.getParameter< bool >( prefix + "omp-enabled" );
+   maxThreadsCount = parameters.getParameter< int >( prefix + "omp-max-threads" );
+   return true;
+}
+
+
 #endif /* TNLHOST_H_ */
diff --git a/src/core/tnlHost.h b/src/core/tnlHost.h
index 6f921492dfb24f0b5c2f8c5106eacf2ff56252c2..23826c968e7d81b4d4bd0f0b0283ec4546dda886 100644
--- a/src/core/tnlHost.h
+++ b/src/core/tnlHost.h
@@ -22,20 +22,48 @@
 #include <core/tnlDevice.h>
 #include <core/tnlString.h>
 
+class tnlConfigDescription;
+class tnlParameterContainer;
+
 class tnlHost
 {
    public:
 
-   enum { DeviceType = tnlHostDevice };
+      enum { DeviceType = tnlHostDevice };
+
+      static tnlString getDeviceType();
+
+   #ifdef HAVE_CUDA
+      __host__ __device__
+   #endif
+      static inline tnlDeviceEnum getDevice() { return tnlHostDevice; };
+
+      static size_t getFreeMemory();
+   
+      static void disableOMP();
+      
+      static void enableOMP();
+      
+      static inline bool isOMPEnabled() { return ompEnabled; };
+      
+      static void setMaxThreadsCount( int maxThreadsCount );
+      
+      static int getMaxThreadsCount();
+      
+      static int getThreadIdx();
+      
+      static void configSetup( tnlConfigDescription& config, const tnlString& prefix = "" );
+      
+      static bool setup( const tnlParameterContainer& parameters,
+                         const tnlString& prefix = "" );
 
-   static tnlString getDeviceType();
+   protected:
+      
+      static bool ompEnabled;
+      
+      static int maxThreadsCount;
 
-#ifdef HAVE_CUDA
-   __host__ __device__
-#endif
-   static inline tnlDeviceEnum getDevice() { return tnlHostDevice; };
 
-   static size_t getFreeMemory();
 };
 
 #endif /* TNLHOST_H_ */
diff --git a/src/core/tnlOmp.cpp b/src/core/tnlOmp.cpp
deleted file mode 100644
index 841d9fc57b1681783fe53c097d53fd50d787200c..0000000000000000000000000000000000000000
--- a/src/core/tnlOmp.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/***************************************************************************
-                          tnlOmp.cpp  -  description
-                             -------------------
-    begin                : Mar 4, 2016
-    copyright            : (C) 2016 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include <core/tnlOmp.h>
-#ifdef HAVE_OPENMP
-#include <omp.h>
-#endif
-
-bool tnlOmp::enabled( true );
-int tnlOmp::maxThreadsCount( -1 );
-
-void tnlOmp::enable()
-{
-   enabled = true;
-}
-
-void tnlOmp::disable()
-{
-   enabled = false;
-}
-
-void tnlOmp::setMaxThreadsCount( int maxThreadsCount_ )
-{
-   maxThreadsCount = maxThreadsCount_;
-}
-
-int tnlOmp::getMaxThreadsCount()
-{
-#ifdef HAVE_OPENMP
-   if( maxThreadsCount == -1 )
-      return omp_get_max_threads();
-   return maxThreadsCount;
-#else
-   return 0;
-#endif
-}
-      
-int tnlOmp::getThreadIdx()
-{
-#ifdef HAVE_OPENMP
-   return omp_get_thread_num();
-#else
-   return 0;
-#endif  
-}
-
-void tnlOmp::configSetup( tnlConfigDescription& config, const tnlString& prefix )
-{
-#ifdef HAVE_OPENMP
-   config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true );
-   config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
-#else
-   config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false );
-   config.addEntry<  int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
-#endif
-   
-}
-      
-bool tnlOmp::setup( const tnlParameterContainer& parameters,
-                    const tnlString& prefix )
-{
-   enabled = parameters.getParameter< bool >( prefix + "omp-enabled" );
-   maxThreadsCount = parameters.getParameter< int >( prefix + "omp-max-threads" );
-   return true;
-}
-
diff --git a/src/core/tnlTimer.cpp b/src/core/tnlTimer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..30e43f41dfb9faa83ebee6dc6b06714c589ef5f8
--- /dev/null
+++ b/src/core/tnlTimer.cpp
@@ -0,0 +1,152 @@
+/***************************************************************************
+                          tnlTimer.cpp  -  description
+                             -------------------
+    begin                : Mar 14, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <core/tnlTimer.h>
+
+#include "tnlConfig.h"
+#ifdef HAVE_SYS_RESOURCE_H
+   #include <sys/resource.h>
+#endif
+#ifdef HAVE_SYS_TIME_H
+   #include <stddef.h>
+   #include <sys/time.h>
+   #define HAVE_TIME
+#endif
+
+
+tnlTimer defaultTimer;
+
+tnlTimer::tnlTimer()
+{
+   reset();
+}
+
+void tnlTimer::reset()
+{
+   this->initialCPUTime = 0;
+   this->totalCPUTime = 0.0;
+   this->initialRealTime = 0;
+   this->totalRealTime = 0.0;
+   this->initialCPUCycles = 0;
+   this->totalCPUCycles = 0;
+   this->stopState = true;
+}
+
+void tnlTimer::stop()
+{
+
+   if( ! this->stopState )
+   {
+      /****
+       * Real time
+       */
+#ifdef HAVE_TIME      
+      struct timeval tp;
+      int rtn = gettimeofday( &tp, NULL );
+      this->totalRealTime += ( double ) tp. tv_sec + 1.0e-6 * ( double ) tp. tv_usec - this->initialRealTime;
+#endif
+      
+      /****
+       * CPU time
+       */
+#ifdef HAVE_SYS_RESOURCE_H      
+      rusage initUsage;
+      getrusage(  RUSAGE_SELF, &initUsage );
+      this->totalCPUTime += initUsage. ru_utime. tv_sec + 1.0e-6 * ( double ) initUsage. ru_utime. tv_usec - this->initialCPUTime;
+#endif      
+      
+      /****
+       * CPU cycles
+       */
+      this->totalCPUCycles += this->rdtsc() - this->initialCPUCycles;
+      this->stopState = true;
+   }
+}
+
+void tnlTimer::start()
+{
+   /****
+    * Real time
+    */
+#ifdef HAVE_TIME
+   struct timeval tp;
+   int rtn = gettimeofday( &tp, NULL );
+   this->initialRealTime = ( double ) tp. tv_sec + 1.0e-6 * ( double ) tp. tv_usec;
+#endif
+
+   /****
+    * CPU Time
+    */
+#ifdef HAVE_SYS_RESOURCE_H
+   rusage initUsage;
+   getrusage( RUSAGE_SELF, &initUsage );
+   this->initialCPUTime = initUsage. ru_utime. tv_sec + 1.0e-6 * ( double ) initUsage. ru_utime. tv_usec;
+#endif
+   
+   /****
+    * CPU cycles
+    */
+   this->initialCPUCycles = this->rdtsc();
+   
+   this->stopState = false;
+}
+
+double tnlTimer::getRealTime()
+{
+#ifdef HAVE_TIME
+   if( ! this->stopState )
+   {
+      this->stop();
+      this->start();
+   }
+   return this->totalRealTime;
+#else
+   return -1;
+#endif
+}
+
+double tnlTimer::getCPUTime()
+{
+#ifdef HAVE_SYS_RESOURCE_H
+   if( ! this->stopState )
+   {
+      this->stop();
+      this->start();
+   }
+   return this->totalCPUTime;
+#else
+   return -1;
+#endif
+}
+
+unsigned long long int tnlTimer::getCPUCycles()
+{
+   if( ! this->stopState )
+   {
+      this->stop();
+      this->start();
+   }
+   return this->totalCPUCycles;
+}
+
+bool tnlTimer::writeLog( tnlLogger& logger, int logLevel )
+{
+   logger.writeParameter< double                 >( "Real time:",  this->getRealTime(),  logLevel );
+   logger.writeParameter< double                 >( "CPU time:",   this->getCPUTime(),   logLevel );
+   logger.writeParameter< unsigned long long int >( "CPU Cycles:", this->getCPUCycles(), logLevel );
+
+}
diff --git a/src/core/tnlOmp.h b/src/core/tnlTimer.h
similarity index 52%
rename from src/core/tnlOmp.h
rename to src/core/tnlTimer.h
index 2308964a13d3f7eef8b3c6ae797f5e9d3b4aaddc..b72001da772626e35f7371ab9f97ba3db1f7a296 100644
--- a/src/core/tnlOmp.h
+++ b/src/core/tnlTimer.h
@@ -1,7 +1,7 @@
 /***************************************************************************
-                          tnlOmp.h  -  description
+                          tnlTimer.h  -  description
                              -------------------
-    begin                : Mar 4, 2016
+    begin                : Mar 14, 2016
     copyright            : (C) 2016 by Tomas Oberhuber
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
@@ -16,40 +16,49 @@
  ***************************************************************************/
 
 
-#ifndef TNLOMP_H
-#define	TNLOMP_H
+#ifndef TNLTIMER_H
+#define	TNLTIMER_H
 
-#include <config/tnlConfigDescription.h>
-#include <config/tnlParameterContainer.h>
+#include <core/tnlLogger.h>
 
-class tnlOmp
+class tnlTimer
 {
-   public:      
-      
-      static void disable();
-      
-      static void enable();
-      
-      static inline bool isEnabled() { return enabled; };
-      
-      static void setMaxThreadsCount( int maxThreadsCount );
-      
-      static int getMaxThreadsCount();
-      
-      static int getThreadIdx();
-      
-      static void configSetup( tnlConfigDescription& config, const tnlString& prefix = "" );
+   public:
+   
+      tnlTimer();
+
+      void reset();
+
+      void stop();
+
+      void start();
+
+      double getRealTime();
+
+      double getCPUTime();
+
+      unsigned long long int getCPUCycles();
       
-      static bool setup( const tnlParameterContainer& parameters,
-                         const tnlString& prefix = "" );
-            
+      bool writeLog( tnlLogger& logger, int logLevel = 0 );
+         
    protected:
-      
-      static bool enabled;
-      
-      static int maxThreadsCount;
+
+   double initialRealTime, totalRealTime,
+          initialCPUTime, totalCPUTime;
+   
+   unsigned long long int initialCPUCycles, totalCPUCycles;
+   
+   bool stopState;
+   
+   inline unsigned long long rdtsc()
+   {
+     unsigned hi, lo;
+     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+     return ( ( unsigned long long ) lo ) | ( ( ( unsigned long long ) hi ) << 32 );
+   }
 };
 
+extern tnlTimer defaultTimer;
 
-#endif	/* TNLOMP_H */
+#endif	/* TNLTIMER_H */
 
diff --git a/src/core/vectors/tnlVectorOperationsHost_impl.h b/src/core/vectors/tnlVectorOperationsHost_impl.h
index cd7a4b48101f0d9c32dae37a61d5eea25ac49a7a..1ea7fd98a1b17d5a237078bc9079d8da33d2cdd8 100644
--- a/src/core/vectors/tnlVectorOperationsHost_impl.h
+++ b/src/core/vectors/tnlVectorOperationsHost_impl.h
@@ -18,8 +18,6 @@
 #ifndef TNLVECTOROPERATIONSHOST_IMPL_H_
 #define TNLVECTOROPERATIONSHOST_IMPL_H_
 
-#include <core/tnlOmp.h>
-
 static const int OpenMPVectorOperationsThreshold = 65536; // TODO: check this threshold
 
 template< typename Vector >
@@ -104,7 +102,7 @@ getVectorL1Norm( const Vector& v )
    Real result( 0.0 );
    const Index n = v. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
    for( Index i = 0; i < n; i ++ )
       result += fabs( v[ i ] );
@@ -122,7 +120,7 @@ getVectorL2Norm( const Vector& v )
    Real result( 0.0 );
    const Index n = v. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
    for( Index i = 0; i < n; i ++ )
    {
@@ -151,7 +149,7 @@ getVectorLpNorm( const Vector& v,
    Real result( 0.0 );
    const Index n = v. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
    for( Index i = 0; i < n; i ++ )
       result += pow( fabs( v[ i ] ), p );
@@ -168,7 +166,7 @@ typename Vector :: RealType tnlVectorOperations< tnlHost > :: getVectorSum( cons
    Real result( 0.0 );
    const Index n = v. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif        
    for( Index i = 0; i < n; i ++ )
       result += v[ i ];
@@ -256,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1,
    Real result( 0.0 );
    const Index n = v1. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif        
    for( Index i = 0; i < n; i ++ )
       result += fabs( v1[ i ] - v2[ i ] );
@@ -278,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1,
    Real result( 0.0 );
    const Index n = v1. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif        
    for( Index i = 0; i < n; i ++ )
    {
@@ -312,7 +310,7 @@ getVectorDifferenceLpNorm( const Vector1& v1,
    Real result( 0.0 );
    const Index n = v1. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif        
    for( Index i = 0; i < n; i ++ )
       result += pow( fabs( v1. getElement( i ) - v2. getElement( i ) ), p );
@@ -332,7 +330,7 @@ typename Vector1::RealType tnlVectorOperations< tnlHost > :: getVectorDifference
    Real result( 0.0 );
    const Index n = v1. getSize();
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif        
    for( Index i = 0; i < n; i ++ )
       result += v1. getElement( i ) - v2. getElement( i );
@@ -371,7 +369,7 @@ typename Vector1 :: RealType tnlVectorOperations< tnlHost > :: getScalarProduct(
    Real result( 0.0 );
    const Index n = v1. getSize();
 #ifdef HAVE_OPENMP
-  #pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+  #pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif     
    for( Index i = 0; i < n; i++ )
       result += v1[ i ] * v2[ i ];
@@ -411,13 +409,13 @@ void tnlVectorOperations< tnlHost > :: addVector( Vector1& y,
    const Index n = y. getSize();
    if( thisMultiplicator == 1.0 )
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
       for( Index i = 0; i < n; i ++ )
          y[ i ] += alpha * x[ i ];
    else
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
       for( Index i = 0; i < n; i ++ )
          y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ];
@@ -445,13 +443,13 @@ addVectors( Vector1& v,
    const Index n = v.getSize();
    if( thisMultiplicator == 1.0 )
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
       for( Index i = 0; i < n; i ++ )
          v[ i ] += multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ];
    else
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
 #endif           
       for( Index i = 0; i < n; i ++ )
          v[ i ] = thisMultiplicator * v[ i ] * multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ];
diff --git a/src/functions/tnlMeshFunction.h b/src/functions/tnlMeshFunction.h
index add52cc1c939d2b202206e15a949f1879285d050..e86aca9d3260e1cb33e9827b7b3cafd87a232c73 100644
--- a/src/functions/tnlMeshFunction.h
+++ b/src/functions/tnlMeshFunction.h
@@ -66,6 +66,8 @@ class tnlMeshFunction :
       bool setup( const tnlParameterContainer& parameters,
                   const tnlString& prefix = "" );      
       
+      void bind( ThisType& meshFunction );
+      
       template< typename Vector >
       void bind( const MeshType& mesh,
                  const Vector& data,
diff --git a/src/functions/tnlMeshFunction_impl.h b/src/functions/tnlMeshFunction_impl.h
index 2b08d2f2927c25def185d48b1084a40299379331..5fe48a66259e5a5c6e702a918482fe95c0af1248 100644
--- a/src/functions/tnlMeshFunction_impl.h
+++ b/src/functions/tnlMeshFunction_impl.h
@@ -133,6 +133,17 @@ setup( const tnlParameterContainer& parameters,
    return true;
 }
 
+template< typename Mesh,
+          int MeshEntityDimensions,
+          typename Real >
+void
+tnlMeshFunction< Mesh, MeshEntityDimensions, Real >::
+bind( tnlMeshFunction< Mesh, MeshEntityDimensions, Real >& meshFunction )
+{
+   this->mesh = meshFunction.getMesh();
+   this->data.bind( meshFunction.getData() );
+}
+
 template< typename Mesh,
           int MeshEntityDimensions,
           typename Real >
diff --git a/src/functions/tnlTestFunction_impl.h b/src/functions/tnlTestFunction_impl.h
index 46f70b3e4fa6bf5b4717702c176b201dc7c49e8b..32271a80d9f85abbb016ccdea29db394e447f293 100644
--- a/src/functions/tnlTestFunction_impl.h
+++ b/src/functions/tnlTestFunction_impl.h
@@ -291,31 +291,31 @@ getPartialDerivative( const VertexType& vertex,
    {
       case constant:
          return scale * ( ( tnlConstantFunction< Dimensions, Real >* ) function )->
-                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                   template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case expBump:
          return scale * ( ( tnlExpBumpFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case sinBumps:
          return scale * ( ( tnlSinBumpsFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case sinWave:
          return scale * ( ( tnlSinWaveFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case cylinder:
          return scale * ( ( tnlCylinderFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case flowerpot:
          return scale * ( ( tnlFlowerpotFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case twins:
          return scale * ( ( tnlTwinsFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case pseudoSquare:
          return scale * ( ( tnlPseudoSquareFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case blob:
          return scale * ( ( tnlBlobFunction< Dimensions, Real >* ) function )->
-                  getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+                  template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       default:
          return 0.0;
    }
@@ -476,17 +476,17 @@ tnlTestFunction< FunctionDimensions, Real, Device >::
 printFunction( ostream& str ) const
 {
    FunctionType* f = ( FunctionType* ) this->function;
-   switch( Device::DeviceType )
+   if( std::is_same< Device, tnlHost >::value )
    {
-      case tnlHostDevice:
-         str << *f;
-         return str;
-      case tnlCudaDevice:
-         tnlCuda::print( f, str );
-         return str;
-      default:
-         return str;
+      str << *f;
+      return str;
    }
+   if( std::is_same< Device, tnlCuda >::value )
+   {
+      tnlCuda::print( f, str );
+      return str;
+   }
+   return str;
 }
 
 template< int FunctionDimensions,
diff --git a/src/matrices/tnlCSRMatrix_impl.h b/src/matrices/tnlCSRMatrix_impl.h
index e441b04685d50859a1e0c750ba378a25d65e4949..6622a0e0d55e8a04e38a971fa5834d06221e2f1d 100644
--- a/src/matrices/tnlCSRMatrix_impl.h
+++ b/src/matrices/tnlCSRMatrix_impl.h
@@ -726,7 +726,7 @@ class tnlCSRMatrixDeviceDependentCode< tnlHost >
          const InVector* inVectorPtr = &inVector;
          OutVector* outVectorPtr = &outVector;
 #ifdef HAVE_OPENMP
-#pragma omp parallel for private( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( tnlOmp::isEnabled() )
+#pragma omp parallel for firstprivate( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( tnlHost::isOMPEnabled() )
 #endif         
          for( Index row = 0; row < rows; row ++ )
             ( *outVectorPtr )[ row ] = matrixPtr->rowVectorProduct( row, *inVectorPtr );
diff --git a/src/matrices/tnlDenseMatrix_impl.h b/src/matrices/tnlDenseMatrix_impl.h
index d20c9512e0384921a1cbf3be1a8538e9115f1546..4e78c369d01cda60d87cf2dbb3cd2d21789ba7d4 100644
--- a/src/matrices/tnlDenseMatrix_impl.h
+++ b/src/matrices/tnlDenseMatrix_impl.h
@@ -932,7 +932,7 @@ class tnlDenseMatrixDeviceDependentCode< tnlHost >
                                  OutVector& outVector )
       {
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() )
+#pragma omp parallel for if( tnlHost::isOMPEnabled() )
 #endif           
          for( Index row = 0; row < matrix.getRows(); row ++ )
             outVector[ row ] = matrix.rowVectorProduct( row, inVector );
diff --git a/src/matrices/tnlEllpackMatrix_impl.h b/src/matrices/tnlEllpackMatrix_impl.h
index b56bdfaf38c627fd81672eebf7dfbdbbe7ff8af7..6061ac0f22ead22825ae46ea4bc72524779860bd 100644
--- a/src/matrices/tnlEllpackMatrix_impl.h
+++ b/src/matrices/tnlEllpackMatrix_impl.h
@@ -688,7 +688,7 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost >
                                  OutVector& outVector )
       {
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() )
+#pragma omp parallel for if( tnlHost::isOMPEnabled() )
 #endif           
          for( Index row = 0; row < matrix.getRows(); row ++ )
             outVector[ row ] = matrix.rowVectorProduct( row, inVector );
diff --git a/src/matrices/tnlMultidiagonalMatrix_impl.h b/src/matrices/tnlMultidiagonalMatrix_impl.h
index ae89ffa4335c444cd86c2ba52d50c4c272f68d8a..2c8a92d0a7b1b3f2db246d7817f0a490b1e6f7cd 100644
--- a/src/matrices/tnlMultidiagonalMatrix_impl.h
+++ b/src/matrices/tnlMultidiagonalMatrix_impl.h
@@ -735,7 +735,7 @@ class tnlMultidiagonalMatrixDeviceDependentCode< tnlHost >
                                  OutVector& outVector )
       {
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() )
+#pragma omp parallel for if( tnlHost::isOMPEnabled() )
 #endif           
          for( Index row = 0; row < matrix.getRows(); row ++ )
             outVector[ row ] = matrix.rowVectorProduct( row, inVector );
diff --git a/src/matrices/tnlSlicedEllpackMatrix_impl.h b/src/matrices/tnlSlicedEllpackMatrix_impl.h
index dd0c6ed78e3902c5ff351965b0c7e4bac18f4049..b047ec08b3891c8ed6d6427cec95f8243424efbc 100644
--- a/src/matrices/tnlSlicedEllpackMatrix_impl.h
+++ b/src/matrices/tnlSlicedEllpackMatrix_impl.h
@@ -756,7 +756,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost >
                                  OutVector& outVector )
       {
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() )
+#pragma omp parallel for if( tnlHost::isOMPEnabled() )
 #endif           
          for( Index row = 0; row < matrix.getRows(); row ++ )
             outVector[ row ] = matrix.rowVectorProduct( row, inVector );
diff --git a/src/matrices/tnlTridiagonalMatrix_impl.h b/src/matrices/tnlTridiagonalMatrix_impl.h
index 9857c0b172bce4c635c7ef88269464316fb1a161..3cb917dc81849e2ce12fa4e40a3c33945d2a332a 100644
--- a/src/matrices/tnlTridiagonalMatrix_impl.h
+++ b/src/matrices/tnlTridiagonalMatrix_impl.h
@@ -654,7 +654,7 @@ class tnlTridiagonalMatrixDeviceDependentCode< tnlHost >
                                  OutVector& outVector )
       {
 #ifdef HAVE_OPENMP
-#pragma omp parallel for if( tnlOmp::isEnabled() )
+#pragma omp parallel for if( tnlHost::isOMPEnabled() )
 #endif           
          for( Index row = 0; row < matrix.getRows(); row ++ )
             outVector[ row ] = matrix.rowVectorProduct( row, inVector );
diff --git a/src/mesh/grids/tnlGridTraverser.h b/src/mesh/grids/tnlGridTraverser.h
index 8b05a88e4eeec5212c3b8a67f33c92f4372a0ca4..8427932dfd340d80ac60187e72522222531baf90 100644
--- a/src/mesh/grids/tnlGridTraverser.h
+++ b/src/mesh/grids/tnlGridTraverser.h
@@ -112,8 +112,8 @@ class tnlGridTraverser< tnlGrid< 2, Real, tnlHost, Index > >
       static void
       processEntities(
          const GridType& grid,
-         const CoordinatesType& begin,
-         const CoordinatesType& end,
+         const CoordinatesType begin,
+         const CoordinatesType end,
          const CoordinatesType& entityOrientation,
          const CoordinatesType& entityBasis,         
          UserData& userData );
diff --git a/src/mesh/grids/tnlGridTraverser_impl.h b/src/mesh/grids/tnlGridTraverser_impl.h
index 93a6fdac22bae92f285b42c94dd559ead740eb25..8fd55207570d47efd120e128a7ef1d1dbea51044 100644
--- a/src/mesh/grids/tnlGridTraverser_impl.h
+++ b/src/mesh/grids/tnlGridTraverser_impl.h
@@ -38,10 +38,11 @@ processEntities(
    const CoordinatesType& entityBasis,   
    UserData& userData )
 {
+
+   
    GridEntity entity( grid );
    entity.setOrientation( entityOrientation );
    entity.setBasis( entityBasis );
-   
    if( processOnlyBoundaryEntities )
    {
       entity.getCoordinates() = begin;
@@ -71,8 +72,7 @@ template< typename Real,
           typename Index,
           typename GridEntity,
           typename UserData,
-          typename EntitiesProcessor,
-          bool processOnlyBoundaryEntities >
+          typename EntitiesProcessor >
 __global__ void 
 tnlGridTraverser1D(
    const tnlGrid< 1, Real, tnlCuda, Index >* grid,
@@ -94,13 +94,49 @@ tnlGridTraverser1D(
    
    if( coordinates.x() <= end->x() )
    {
-      if( ! processOnlyBoundaryEntities || entity.isBoundaryEntity() )
+      //if( ! processOnlyBoundaryEntities || entity.isBoundaryEntity() )
       {
          entity.refresh();
          EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity );
       }
    }
 }
+
+template< typename Real,
+          typename Index,
+          typename GridEntity,
+          typename UserData,
+          typename EntitiesProcessor >
+__global__ void 
+tnlGridBoundaryTraverser1D(
+   const tnlGrid< 1, Real, tnlCuda, Index >* grid,
+   UserData* userData,
+   const typename GridEntity::CoordinatesType* begin,
+   const typename GridEntity::CoordinatesType* end,
+   const typename GridEntity::CoordinatesType* entityOrientation,
+   const typename GridEntity::CoordinatesType* entityBasis )
+{
+   typedef Real RealType;
+   typedef Index IndexType;
+   typedef tnlGrid< 1, Real, tnlCuda, Index > GridType;
+   typename GridType::CoordinatesType coordinates;
+   
+   if( threadIdx.x == 0 )
+   {   
+      coordinates.x() = begin->x();
+      GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis );
+      entity.refresh();
+      EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity );
+   }
+   if( threadIdx.x == 1 )
+   {   
+      coordinates.x() = end->x();
+      GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis );
+      entity.refresh();
+      EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity );
+   }
+}
+
 #endif
 
 template< typename Real,           
@@ -128,21 +164,37 @@ processEntities(
    typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid );
    UserData* kernelUserData = tnlCuda::passToDevice( userData );
       
-   dim3 cudaBlockSize( 256 );
-   dim3 cudaBlocks;
-   cudaBlocks.x = tnlCuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
-   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
+   if( processOnlyBoundaryEntities )
+   {
+      dim3 cudaBlockSize( 2 );
+      dim3 cudaBlocks( 1 );
+      tnlGridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
+            <<< cudaBlocks, cudaBlockSize >>>
+            ( kernelGrid,
+              kernelUserData,
+              kernelBegin,
+              kernelEnd,
+              kernelEntityOrientation,
+              kernelEntityBasis );
+   }
+   else
+   {
+      dim3 cudaBlockSize( 256 );
+      dim3 cudaBlocks;
+      cudaBlocks.x = tnlCuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+      const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
 
-   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
-      tnlGridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities >
-         <<< cudaBlocks, cudaBlockSize >>>
-         ( kernelGrid,
-           kernelUserData,
-           kernelBegin,
-           kernelEnd,
-           kernelEntityOrientation,
-           kernelEntityBasis,
-           gridXIdx );
+      for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+         tnlGridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
+            <<< cudaBlocks, cudaBlockSize >>>
+            ( kernelGrid,
+              kernelUserData,
+              kernelBegin,
+              kernelEnd,
+              kernelEntityOrientation,
+              kernelEntityBasis,
+              gridXIdx );
+   }
    cudaThreadSynchronize();
    checkCudaDevice;
    tnlCuda::freeFromDevice( kernelGrid );
@@ -172,8 +224,8 @@ void
 tnlGridTraverser< tnlGrid< 2, Real, tnlHost, Index > >::
 processEntities(
    const GridType& grid,
-   const CoordinatesType& begin,
-   const CoordinatesType& end,
+   const CoordinatesType begin,
+   const CoordinatesType end,
    const CoordinatesType& entityOrientation,
    const CoordinatesType& entityBasis,      
    UserData& userData )
@@ -211,6 +263,7 @@ processEntities(
    }
    else
    {
+//#pragma omp parallel for firstprivate( entity, begin, end ) if( tnlHost::isOMPEnabled() )      
       for( entity.getCoordinates().y() = begin.y();
            entity.getCoordinates().y() <= end.y();
            entity.getCoordinates().y() ++ )
@@ -314,6 +367,7 @@ processEntities(
               kernelEntityBasis,
               gridXIdx,
               gridYIdx );
+      
    cudaThreadSynchronize();
    checkCudaDevice;   
    tnlCuda::freeFromDevice( kernelGrid );
diff --git a/src/operators/CMakeLists.txt b/src/operators/CMakeLists.txt
index 87ce7309512ca0b3bbb1bad46a2655eb5b5e7dcf..3ab7e12a6678cd719ce4c1e33bf14bdf104ac72f 100755
--- a/src/operators/CMakeLists.txt
+++ b/src/operators/CMakeLists.txt
@@ -5,7 +5,6 @@ ADD_SUBDIRECTORY( operator-Q )
 ADD_SUBDIRECTORY( operator-curvature )
 
 SET( headers tnlDirichletBoundaryConditions.h
-             tnlDirichletBoundaryConditions_impl.h
              tnlExactFunctionInverseOperator.h
              tnlExactIdentityOperator.h
              tnlExactOperatorComposition.h
diff --git a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h
index af39c588118eb976eec6425cd2c9d068d530c5e5..9b16952d403e5b707a52c41983b40c4de4085034 100644
--- a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h
+++ b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h
@@ -52,7 +52,7 @@ class tnlFiniteVolumeNonlinearOperator< tnlGrid< 1,MeshReal, Device, MeshIndex >
              typename Vector,
              typename Matrix >
    __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
@@ -106,7 +106,7 @@ class tnlFiniteVolumeNonlinearOperator< tnlGrid< 2, MeshReal, Device, MeshIndex
              typename Vector,
              typename Matrix >
    __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
@@ -158,7 +158,7 @@ class tnlFiniteVolumeNonlinearOperator< tnlGrid< 3, MeshReal, Device, MeshIndex
              typename Vector,
              typename Matrix >
    __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
diff --git a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h
index 9bd82a6965d7253712ac51446a31577a95f3df22..2ab2cf18c7b373c460b2e9c566e9978c89bf716c 100644
--- a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h
+++ b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h
@@ -71,7 +71,7 @@ template< typename MeshEntity,
 __cuda_callable__
 void
 tnlFiniteVolumeNonlinearOperator< tnlGrid< 1, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -155,7 +155,7 @@ template< typename MeshEntity,
 __cuda_callable__
 void
 tnlFiniteVolumeNonlinearOperator< tnlGrid< 2, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -268,7 +268,7 @@ __cuda_callable__
 #endif
 void
 tnlFiniteVolumeNonlinearOperator< tnlGrid< 3, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
diff --git a/src/operators/diffusion/tnlExactMeanCurvature.h b/src/operators/diffusion/tnlExactMeanCurvature.h
index bb4b3d67fb798ad1c8d2a5605cea77c532528d12..19506f8382db826d52fa016399c194431854379d 100644
--- a/src/operators/diffusion/tnlExactMeanCurvature.h
+++ b/src/operators/diffusion/tnlExactMeanCurvature.h
@@ -44,7 +44,7 @@ class tnlExactMeanCurvature
       template< typename Real >
       void setRegularizationEpsilon( const Real& eps)
       {
-         nonlinearDiffusion.getNonlinearity().getInnerOperator().setRegularizationEpislon( eps );
+         nonlinearDiffusion.getNonlinearity().getInnerOperator().setRegularizationEpsilon( eps );
       }
       
       template< typename Function >
diff --git a/src/operators/diffusion/tnlExactNonlinearDiffusion.h b/src/operators/diffusion/tnlExactNonlinearDiffusion.h
index fafc9656d465a9293ea18a0e0902b3d20184d101..87403c37446355e7f0392ebcfe5f67a692f8371b 100644
--- a/src/operators/diffusion/tnlExactNonlinearDiffusion.h
+++ b/src/operators/diffusion/tnlExactNonlinearDiffusion.h
@@ -73,7 +73,7 @@ class tnlExactNonlinearDiffusion< 1, Nonlinearity, InnerOperator >
          const RealType u_xx = innerOperator.template getPartialDerivative< Function, 2, 0, 0 >( function, v, time );
          const RealType g = nonlinearity( function, v, time ); 
          const RealType g_x = nonlinearity.template getPartialDerivative< Function, 1, 0, 0 >( function, v, time );
-         return u_xx + u_x * g_x / g;          
+         return u_xx * g + u_x * g_x;          
       }
    
       protected:
@@ -131,7 +131,7 @@ class tnlExactNonlinearDiffusion< 2, Nonlinearity, InnerOperator >
          const RealType g_x = nonlinearity.template getPartialDerivative< Function, 1, 0, 0 >( function, v, time );
          const RealType g_y = nonlinearity.template getPartialDerivative< Function, 0, 1, 0 >( function, v, time );
 
-         return  u_xx + u_yy + ( g_x * u_x + g_y * u_y ) / g; 
+         return  ( u_xx + u_yy ) * g + g_x * u_x + g_y * u_y; 
       }
 
       protected:
@@ -193,7 +193,7 @@ class tnlExactNonlinearDiffusion< 3, Nonlinearity, InnerOperator >
          const RealType g_y = nonlinearity.template getPartialDerivative< Function, 0, 1, 0 >( function, v, time );
          const RealType g_z = nonlinearity.template getPartialDerivative< Function, 0, 0, 1 >( function, v, time );
 
-         return  u_xx + u_yy + u_zz + ( g_x * u_x + g_y * u_y + g_z * u_z ) / g; 
+         return ( u_xx + u_yy + u_zz ) * g + g_x * u_x + g_y * u_y + g_z * u_z; 
       }
       
       protected:
diff --git a/src/operators/diffusion/tnlLinearDiffusion.h b/src/operators/diffusion/tnlLinearDiffusion.h
index b4683676e673a985b0a233f5a3f016b9f8bf7f4e..9c63d19b641e79c4a8c282154e74273e1f1c80bd 100644
--- a/src/operators/diffusion/tnlLinearDiffusion.h
+++ b/src/operators/diffusion/tnlLinearDiffusion.h
@@ -55,12 +55,10 @@ class tnlLinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index
       
       static constexpr int getMeshDimensions() { return Dimensions; }
       
-      template< int EntityDimensions = Dimensions >
-      using MeshFunction = tnlMeshFunction< MeshType, EntityDimensions >;
-
       static tnlString getType();
 
-      template< typename PreimageFunction, typename MeshEntity >
+      template< typename PreimageFunction,
+                typename MeshEntity >
       __cuda_callable__
       inline Real operator()( const PreimageFunction& u,
                               const MeshEntity& entity,
@@ -72,19 +70,17 @@ class tnlLinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index
                                              const IndexType& index,
                                              const MeshEntity& entity ) const;
 
-      template< typename MeshEntity,
-                typename Vector,
-                typename MatrixRow >
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
       __cuda_callable__
-      inline void updateLinearSystem( const RealType& time,
-                                      const RealType& tau,
-                                      const MeshType& mesh,
-                                      const IndexType& index,
-                                      const MeshEntity& entity,
-                                      const MeshFunction< 1 >& u,
-                                      Vector& b,
-                                      MatrixRow& matrixRow ) const;
-
+      inline void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const;
 };
 
 
@@ -110,10 +106,6 @@ class tnlLinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index
       
       static constexpr int getMeshDimensions() { return Dimensions; }
 
-      
-      template< int EntityDimensions = Dimensions >
-      using MeshFunction = tnlMeshFunction< MeshType, EntityDimensions >;      
-
       static tnlString getType();
 
       template< typename PreimageFunction, typename EntityType >
@@ -127,19 +119,18 @@ class tnlLinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index
       inline Index getLinearSystemRowLength( const MeshType& mesh,
                                              const IndexType& index,
                                              const EntityType& entity ) const;
-
-      template< typename Vector,
-                typename MatrixRow,
-                typename EntityType >
+      
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
       __cuda_callable__
-      inline void updateLinearSystem( const RealType& time,
-                                      const RealType& tau,
-                                      const MeshType& mesh,
-                                      const IndexType& index,
-                                      const EntityType& entity,
-                                      const MeshFunction< 2 >& u,
-                                      Vector& b,
-                                      MatrixRow& matrixRow ) const;
+      inline void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const;
 };
 
 
@@ -165,10 +156,6 @@ class tnlLinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index
       
       static constexpr int getMeshDimensions() { return Dimensions; }      
 
-      template< int EntityDimensions = Dimensions >
-      using MeshFunction = tnlMeshFunction< MeshType, EntityDimensions >;
-
-      
       static tnlString getType();
 
       template< typename PreimageFunction, 
@@ -184,19 +171,17 @@ class tnlLinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index
                                              const IndexType& index,
                                              const EntityType& entity ) const;
 
-      template< typename Vector,
-                typename MatrixRow,
-                typename EntityType >
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
       __cuda_callable__
-      inline void updateLinearSystem( const RealType& time,
-                                      const RealType& tau,
-                                      const MeshType& mesh,
-                                      const IndexType& index,
-                                      const EntityType& entity,
-                                      const MeshFunction< 3 >& u,
-                                      Vector& b,
-                                      MatrixRow& matrixRow ) const;
-
+      inline void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const;
 };
 
 
diff --git a/src/operators/diffusion/tnlLinearDiffusion_impl.h b/src/operators/diffusion/tnlLinearDiffusion_impl.h
index ef38d2e8b0f65119b6769fb778b9475df777ec4d..04e4177f6a1b3950912d70926fade6d7d17724a1 100644
--- a/src/operators/diffusion/tnlLinearDiffusion_impl.h
+++ b/src/operators/diffusion/tnlLinearDiffusion_impl.h
@@ -82,25 +82,27 @@ template< typename MeshReal,
           typename MeshIndex,
           typename Real,
           typename Index >
-   template< typename MeshEntity,
-             typename Vector, 
-             typename Matrix >
+   template< typename PreimageFunction,
+             typename MeshEntity,
+             typename Matrix,
+             typename Vector >
 __cuda_callable__
 inline
 void
 tnlLinearDiffusion< tnlGrid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-updateLinearSystem( const RealType& time,
-                    const RealType& tau,
-                    const MeshType& mesh,
-                    const IndexType& index,
-                    const MeshEntity& entity,
-                    const MeshFunction< 1 >& u,
-                    Vector& b,
-                    Matrix& matrix ) const
+setMatrixElements( const PreimageFunction& u,
+                   const MeshEntity& entity,
+                   const RealType& time,
+                   const RealType& tau,
+                   Matrix& matrix,
+                   Vector& b ) const
 {
+   static_assert( MeshEntity::entityDimensions == 1, "Wrong mesh entity dimensions." );
+   static_assert( PreimageFunction::getEntitiesDimensions() == 1, "Wrong preimage function" );
    const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+   const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   const RealType lambdaX = tau * mesh.template getSpaceStepsProducts< -2 >();
+   const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2 >();
    matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1 >(),      - lambdaX );
    matrixRow.setElement( 1, index,                                              2.0 * lambdaX );
    matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< 1 >(),       - lambdaX );   
@@ -171,26 +173,28 @@ template< typename MeshReal,
           typename MeshIndex,
           typename Real,
           typename Index >
-   template< typename Vector,
+   template< typename PreimageFunction,
+             typename MeshEntity,
              typename Matrix,
-             typename EntityType >
+             typename Vector >
 __cuda_callable__
 inline
 void
 tnlLinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-updateLinearSystem( const RealType& time,
-                    const RealType& tau,
-                    const MeshType& mesh,
-                    const IndexType& index,
-                    const EntityType& entity,
-                    const MeshFunction< 2 >& u,
-                    Vector& b,
-                    Matrix& matrix ) const
+setMatrixElements( const PreimageFunction& u,
+                   const MeshEntity& entity,
+                   const RealType& time,
+                   const RealType& tau,
+                   Matrix& matrix,
+                   Vector& b ) const
 {
+   static_assert( MeshEntity::entityDimensions == 2, "Wrong mesh entity dimensions." );
+   static_assert( PreimageFunction::getEntitiesDimensions() == 2, "Wrong preimage function" );
+   const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   const RealType lambdaX = tau * mesh.template getSpaceStepsProducts< -2, 0 >();
-   const RealType lambdaY = tau * mesh.template getSpaceStepsProducts< 0, -2 >();
-   const typename EntityType::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+   const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0 >();
+   const RealType lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
+   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
    matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1 >(), -lambdaY );
    matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< -1, 0 >(), -lambdaX );
    matrixRow.setElement( 2, index,                                                        2.0 * ( lambdaX + lambdaY ) );
@@ -266,27 +270,29 @@ template< typename MeshReal,
           typename MeshIndex,
           typename Real,
           typename Index >
-   template< typename Vector,
+   template< typename PreimageFunction,
+             typename MeshEntity,
              typename Matrix,
-             typename EntityType >
+             typename Vector >
 __cuda_callable__
 inline
 void
 tnlLinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-updateLinearSystem( const RealType& time,
-                    const RealType& tau,
-                    const MeshType& mesh,
-                    const IndexType& index,
-                    const EntityType& entity,
-                    const MeshFunction< 3 >& u,
-                    Vector& b,
-                    Matrix& matrix ) const
+setMatrixElements( const PreimageFunction& u,
+                   const MeshEntity& entity,
+                   const RealType& time,
+                   const RealType& tau,
+                   Matrix& matrix,
+                   Vector& b ) const
 {
-   const typename EntityType::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   static_assert( MeshEntity::entityDimensions == 3, "Wrong mesh entity dimensions." );
+   static_assert( PreimageFunction::getEntitiesDimensions() == 3, "Wrong preimage function" );   
+   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   const RealType lambdaX = tau * mesh.template getSpaceStepsProducts< -2, 0, 0 >();
-   const RealType lambdaY = tau * mesh.template getSpaceStepsProducts< 0, -2, 0 >();
-   const RealType  lambdaZ = tau * mesh.template getSpaceStepsProducts< 0, 0, -2 >();
+   const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >();
+   const RealType lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >();
+   const RealType lambdaZ = tau * entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >();
    matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, 0, -1 >(), -lambdaZ );
    matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, -1, 0 >(), -lambdaY );
    matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< -1, 0, 0 >(), -lambdaX );
diff --git a/src/operators/diffusion/tnlNonlinearDiffusion_impl.h b/src/operators/diffusion/tnlNonlinearDiffusion_impl.h
index 88f0bae7ea1b840b24c3f37e73f5a2ba4874a69d..be5e87d68c57ce72093beb484aacb0ed729b070d 100644
--- a/src/operators/diffusion/tnlNonlinearDiffusion_impl.h
+++ b/src/operators/diffusion/tnlNonlinearDiffusion_impl.h
@@ -71,7 +71,7 @@ template< typename MeshEntity,
 __cuda_callable__
 void
 tnlNonlinearDiffusion< tnlGrid< 1, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -80,7 +80,7 @@ updateLinearSystem( const RealType& time,
                     Vector& b,
                     Matrix& matrix ) const
 {
-    nonlinearDiffusionOperator.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix );
+    nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
 template< typename MeshReal,
@@ -148,7 +148,7 @@ template< typename MeshEntity,
 __cuda_callable__
 void
 tnlNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -157,7 +157,7 @@ updateLinearSystem( const RealType& time,
                     Vector& b,
                     Matrix& matrix ) const
 {
-    nonlinearDiffusionOperator.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix );
+    nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
 template< typename MeshReal,
@@ -225,7 +225,7 @@ template< typename MeshEntity,
 __cuda_callable__
 void
 tnlNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -234,7 +234,7 @@ updateLinearSystem( const RealType& time,
                     Vector& b,
                     Matrix& matrix ) const
 {
-    nonlinearDiffusionOperator.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix );
+    nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
 }
 
 #endif	/* TNLNONLINEARDIFFUSION_IMPL_H */
diff --git a/src/operators/diffusion/tnlOneSidedMeanCurvature.h b/src/operators/diffusion/tnlOneSidedMeanCurvature.h
index 87386d89d8fe0145235e6b6c9f09b9226f536bbb..a75f0f855257f3bdb8b8665b1c5c46bb53241579 100644
--- a/src/operators/diffusion/tnlOneSidedMeanCurvature.h
+++ b/src/operators/diffusion/tnlOneSidedMeanCurvature.h
@@ -106,7 +106,7 @@ class tnlOneSidedMeanCurvature
                 typename Vector,
                 typename Matrix >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
@@ -115,7 +115,7 @@ class tnlOneSidedMeanCurvature
                                Vector& b,
                                Matrix& matrix ) const
       {
-         this->nonlinearDiffusion.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix );
+         this->nonlinearDiffusion.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix );
       }            
       
    protected:      
diff --git a/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h b/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h
index 1eb186f934d4d7b6f705a53ec2518d11700a9715..e6f62e3edfda873916198ce45f8bcec8609361b2 100644
--- a/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h
+++ b/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h
@@ -71,7 +71,7 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, N
       {
          const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
          const typename MeshEntity::MeshType& mesh = entity.getMesh();
-         const RealType& hx_div = mesh.template getSpaceStepsProducts< -2 >();
+         const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2 >();
          const IndexType& center = entity.getIndex();
          const IndexType& east = neighbourEntities.template getEntityIndex<  1 >();
          const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
@@ -92,26 +92,24 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, N
          return 3;
       }
 
-      template< typename MeshEntity,
-                typename MeshFunction,
-                typename Vector,
-                typename Matrix >
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunction& u,
-                               Vector& b,
-                               Matrix& matrix ) const
+      inline void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const
       {
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
          const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
          const IndexType& center = entity.getIndex();
          const IndexType& east = neighbourEntities.template getEntityIndex<  1 >();
          const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
-         const RealType lambda_x = tau * mesh.template getSpaceStepsProducts< -2 >();
+         const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2 >();
          const RealType& nonlinearity_center = this->nonlinearity[ center ];
          const RealType& nonlinearity_west = this->nonlinearity[ west ];
          const RealType aCoef = -lambda_x * nonlinearity_west;
@@ -167,8 +165,8 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >,
       {
          const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
          const typename MeshEntity::MeshType& mesh = entity.getMesh();
-         const RealType& hx_div = mesh.template getSpaceStepsProducts< -2,  0 >();
-         const RealType& hy_div = mesh.template getSpaceStepsProducts<  0, -2 >();
+         const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2,  0 >();
+         const RealType& hy_div = entity.getMesh().template getSpaceStepsProducts<  0, -2 >();
          const IndexType& center = entity.getIndex();
          const IndexType& east = neighbourEntities.template getEntityIndex<  1, 0 >();
          const IndexType& west = neighbourEntities.template getEntityIndex< -1, 0 >();
@@ -194,19 +192,17 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >,
          return 5;
       }
 
-      template< typename MeshEntity,
-                typename MeshFunction,
-                typename Vector,
-                typename Matrix >
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunction& u,
-                               Vector& b,
-                               Matrix& matrix ) const
+      inline void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const
       {
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
          const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
@@ -215,8 +211,8 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >,
          const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >();
          const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >();
          const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();                  
-         const RealType lambda_x = tau * mesh.template getSpaceStepsProducts< -2,  0 >();
-         const RealType lambda_y = tau * mesh.template getSpaceStepsProducts<  0, -2 >();
+         const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2,  0 >();
+         const RealType lambda_y = tau * entity.getMesh().template getSpaceStepsProducts<  0, -2 >();
          const RealType& nonlinearity_center = this->nonlinearity[ center ];
          const RealType& nonlinearity_west = this->nonlinearity[ west ];
          const RealType& nonlinearity_south = this->nonlinearity[ south ];
@@ -278,9 +274,9 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >,
       {
          const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
          const typename MeshEntity::MeshType& mesh = entity.getMesh();
-         const RealType& hx_div = mesh.template getSpaceStepsProducts< -2,  0,  0 >();
-         const RealType& hy_div = mesh.template getSpaceStepsProducts<  0, -2,  0 >();
-         const RealType& hz_div = mesh.template getSpaceStepsProducts<  0,  0, -2 >();
+         const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >();
+         const RealType& hy_div = entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >();
+         const RealType& hz_div = entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >();
          const IndexType& center = entity.getIndex();
          const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >();
          const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >();
@@ -313,19 +309,17 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >,
          return 7;
       }
 
-      template< typename MeshEntity,
-                typename MeshFunction,
-                typename Vector,                
-                typename Matrix >
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const RealType& tau,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const MeshEntity& entity,
-                               const MeshFunction& u,
-                               Vector& b,
-                               Matrix& matrix ) const
+      inline void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const
       {
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
          const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
@@ -338,9 +332,9 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >,
          const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();                  
          
          
-         const RealType lambda_x = tau * mesh.template getSpaceStepsProducts< -2,  0,  0 >();
-         const RealType lambda_y = tau * mesh.template getSpaceStepsProducts<  0, -2,  0 >();
-         const RealType lambda_z = tau * mesh.template getSpaceStepsProducts<  0,  0, -2 >();
+         const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >();
+         const RealType lambda_y = tau * entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >();
+         const RealType lambda_z = tau * entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >();
          const RealType& nonlinearity_center = this->nonlinearity[ center ];
          const RealType& nonlinearity_west   = this->nonlinearity[ west ];
          const RealType& nonlinearity_south  = this->nonlinearity[ south ];
diff --git a/src/operators/geometric/tnlExactGradientNorm.h b/src/operators/geometric/tnlExactGradientNorm.h
index 42af00e87e9d86fa80d3fdbc5fed18237c5a6df3..1b03c3cf12670d7a3826be7f70adc62908939a31 100644
--- a/src/operators/geometric/tnlExactGradientNorm.h
+++ b/src/operators/geometric/tnlExactGradientNorm.h
@@ -45,7 +45,7 @@ class tnlExactGradientNorm< 1, Real >
       tnlExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
-      void setRegularization( const Real& epsilon )
+      void setRegularizationEpsilon( const Real& epsilon )
       {
          this->epsilonSquare = epsilon*epsilon;
       }
@@ -113,7 +113,7 @@ class tnlExactGradientNorm< 2, Real >
       tnlExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
-      void setRegularization( const Real& epsilon )
+      void setRegularizationEpsilon( const Real& epsilon )
       {
          this->epsilonSquare = epsilon*epsilon;
       }
@@ -187,7 +187,7 @@ class tnlExactGradientNorm< 3, Real >
       tnlExactGradientNorm()
       : epsilonSquare( 0.0 ){};
 
-      void setRegularization( const Real& epsilon )
+      void setRegularizationEpsilon( const Real& epsilon )
       {
          this->epsilonSquare = epsilon*epsilon;
       }
diff --git a/src/operators/tnlDirichletBoundaryConditions.h b/src/operators/tnlDirichletBoundaryConditions.h
index 01f55a1e8e6861f2db5d0a8f6458acbd70ee284b..b155bb7d964cd2d1c98bccb24470c79d69c6ecff 100644
--- a/src/operators/tnlDirichletBoundaryConditions.h
+++ b/src/operators/tnlDirichletBoundaryConditions.h
@@ -20,6 +20,7 @@
 
 #include <operators/tnlOperator.h>
 #include <functions/tnlConstantFunction.h>
+#include <functions/tnlFunctionAdapter.h>
 
 template< typename Mesh,
           typename Function = tnlConstantFunction< Mesh::getMeshDimensions(), typename Mesh::RealType >,
@@ -36,57 +37,86 @@ class tnlDirichletBoundaryConditions
 {
    public:
 
-   typedef Mesh MeshType;
-   typedef Function FunctionType;
-   typedef Real RealType;
-   typedef typename MeshType::DeviceType DeviceType;
-   typedef Index IndexType;
-  
-   typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
-   typedef typename MeshType::VertexType VertexType;
-   
-   static constexpr int getMeshDimensions() { return MeshType::meshDimensions; }
+      typedef Mesh MeshType;
+      typedef Function FunctionType;
+      typedef Real RealType;
+      typedef typename MeshType::DeviceType DeviceType;
+      typedef Index IndexType;
 
-   static void configSetup( tnlConfigDescription& config,
-                            const tnlString& prefix = "" );
+      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef typename MeshType::VertexType VertexType;
 
-   bool setup( const tnlParameterContainer& parameters,
-               const tnlString& prefix = "" );
+      static constexpr int getMeshDimensions() { return MeshType::meshDimensions; }
 
-   void setFunction( const Function& function );
-   
-   Function& getFunction();
+      static void configSetup( tnlConfigDescription& config,
+                               const tnlString& prefix = "" )
+      {
+         Function::configSetup( config, prefix );
+      }
+      
+      bool setup( const tnlParameterContainer& parameters,
+                  const tnlString& prefix = "" )
+      {
+         return this->function.setup( parameters, prefix );
+      }
 
-   const Function& getFunction() const;
-   
-   template< typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-   const RealType operator()( const MeshFunction& u,
-                              const EntityType& entity,                            
-                              const RealType& time = 0 ) const;
+      void setFunction( const Function& function )
+      {
+         this->function = function;
+      }
+
+      Function& getFunction()
+      {
+         return this->function;
+      }
+      
+      const Function& getFunction() const
+      {
+         return this->function;
+      }
+
+      template< typename EntityType,
+                typename MeshFunction >
+      __cuda_callable__
+      const RealType operator()( const MeshFunction& u,
+                                 const EntityType& entity,                            
+                                 const RealType& time = 0 ) const
+      {
+         //static_assert( EntityType::getDimensions() == MeshEntitiesDimensions, "Wrong mesh entity dimensions." );
+         return tnlFunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time );
+      }
+
+      template< typename EntityType >
+      __cuda_callable__
+      IndexType getLinearSystemRowLength( const MeshType& mesh,
+                                          const IndexType& index,
+                                          const EntityType& entity ) const
+      {
+         return 1;
+      }
+
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
+      __cuda_callable__
+      void setMatrixElements( const PreimageFunction& u,
+                              const MeshEntity& entity,
+                              const RealType& time,
+                              const RealType& tau,
+                              Matrix& matrix,
+                              Vector& b ) const
+      {
+         typename Matrix::MatrixRow matrixRow = matrix.getRow( entity.getIndex() );
+         const IndexType& index = entity.getIndex();
+         matrixRow.setElement( 0, index, 1.0 );
+         b[ index ] = tnlFunctionAdapter< MeshType, Function >::getValue( this->function, entity, time );
+      }
    
-   template< typename EntityType >
-   __cuda_callable__
-   IndexType getLinearSystemRowLength( const MeshType& mesh,
-                                       const IndexType& index,
-                                       const EntityType& entity ) const;
-
-   template< typename MatrixRow,
-             typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const EntityType& entity,
-                               const MeshFunction& u,
-                               DofVectorType& b,
-                               MatrixRow& matrixRow ) const;
 
    protected:
 
-   Function function;
+      Function function;
    
    //static_assert( Device::DeviceType == Function::Device::DeviceType );
 };
@@ -99,6 +129,4 @@ ostream& operator << ( ostream& str, const tnlDirichletBoundaryConditions< Mesh,
    return str;
 }
 
-#include <operators/tnlDirichletBoundaryConditions_impl.h>
-
 #endif /* TNLDIRICHLETBOUNDARYCONDITIONS_H_ */
diff --git a/src/operators/tnlDirichletBoundaryConditions_impl.h b/src/operators/tnlDirichletBoundaryConditions_impl.h
deleted file mode 100644
index f78db6237a04fe3ac07afa66497c15237dc9a947..0000000000000000000000000000000000000000
--- a/src/operators/tnlDirichletBoundaryConditions_impl.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/***************************************************************************
-                          tnlDirichletBoundaryConditions_impl.h  -  description
-                             -------------------
-    begin                : Nov 17, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef TNLDIRICHLETBOUNDARYCONDITIONS_IMPL_H_
-#define TNLDIRICHLETBOUNDARYCONDITIONS_IMPL_H_
-
-#include <functions/tnlFunctionAdapter.h>
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-void
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-configSetup( tnlConfigDescription& config,
-             const tnlString& prefix )
-{
-   Function::configSetup( config, prefix );
-}
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-bool
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-setup( const tnlParameterContainer& parameters,
-       const tnlString& prefix )
-{
-   return this->function.setup( parameters, prefix );
-}
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-void
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-setFunction( const Function& function )
-{
-   this->function = function;
-}
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-Function&
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-getFunction()
-{
-   return this->function;
-}
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-const Function&
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-getFunction() const
-{
-   return *this->function;
-}
-
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-template< typename EntityType,
-          typename MeshFunction >
-__cuda_callable__
-const Real
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-operator()( const MeshFunction& u,
-            const EntityType& entity,            
-            const RealType& time ) const
-{
-   //static_assert( EntityType::getDimensions() == MeshEntitiesDimensions, "Wrong mesh entity dimensions." );
-   return tnlFunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time );
-}
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-   template< typename EntityType >
-__cuda_callable__
-Index
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-getLinearSystemRowLength( const MeshType& mesh,
-                          const IndexType& index,
-                          const EntityType& entity ) const
-{
-   return 1;
-}
-
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimensions,
-          typename Real,
-          typename Index >
-   template< typename Matrix,
-             typename EntityType,
-             typename MeshFunction >
-__cuda_callable__
-void
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >::
-updateLinearSystem( const RealType& time,
-                    const MeshType& mesh,
-                    const IndexType& index,
-                    const EntityType& entity,
-                    const MeshFunction& u,
-                    DofVectorType& b,
-                    Matrix& matrix ) const
-{
-   typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   matrixRow.setElement( 0, index, 1.0 );
-   b[ index ] = tnlFunctionAdapter< MeshType, Function >::getValue( this->function, entity, time );
-}
-
-#endif /* TNLDIRICHLETBOUNDARYCONDITIONS_IMPL_H_ */
diff --git a/src/operators/tnlNeumannBoundaryConditions.h b/src/operators/tnlNeumannBoundaryConditions.h
index 7f6e3e15364a1eaf92196e158fd48a8bdd189553..dabef95f9893e768b75ac842b74a63f402d30be2 100644
--- a/src/operators/tnlNeumannBoundaryConditions.h
+++ b/src/operators/tnlNeumannBoundaryConditions.h
@@ -1,6 +1,8 @@
 #ifndef TNLNEUMANNBOUNDARYCONDITIONS_H
 #define	TNLNEUMANNBOUNDARYCONDITIONS_H
 
+#include <functions/tnlFunctionAdapter.h>
+
 template< typename Mesh,
           typename Function,
           typename Real = typename Mesh::RealType,
@@ -74,26 +76,60 @@ class tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, F
    __cuda_callable__
    const RealType operator()( const MeshFunction& u,
                               const EntityType& entity,   
-                              const RealType& time = 0 ) const;
+                              const RealType& time = 0 ) const
+   {
+      const MeshType& mesh = entity.getMesh();
+      const auto& neighbourEntities = entity.getNeighbourEntities();
+      const IndexType& index = entity.getIndex();
+      if( entity.getCoordinates().x() == 0 )
+         return u[ neighbourEntities.template getEntityIndex< 1 >() ] - entity.getMesh().getSpaceSteps().x() * 
+            tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+      else
+         return u[ neighbourEntities.template getEntityIndex< -1 >() ] + entity.getMesh().getSpaceSteps().x() * 
+            tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );   
+
+   }
 
 
    template< typename EntityType >
    __cuda_callable__
    Index getLinearSystemRowLength( const MeshType& mesh,
                                    const IndexType& index,
-                                   const EntityType& entity ) const;
-
-   template< typename MatrixRow,
-             typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const EntityType& entity,
-                               const MeshFunction& u,
-                               DofVectorType& b,
-                               MatrixRow& matrixRow ) const;
+                                   const EntityType& entity ) const
+   {
+      return 2;
+   }
+
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
+      __cuda_callable__
+      void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const
+      {
+         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const IndexType& index = entity.getIndex();
+         typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
+         if( entity.getCoordinates().x() == 0 )
+         {
+            matrixRow.setElement( 0, index, 1.0 );
+            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1 >(), -1.0 );
+            b[ index ] = - entity.getMesh().getSpaceSteps().x() * 
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         else
+         {
+            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1 >(), -1.0 );
+            matrixRow.setElement( 1, index, 1.0 );
+            b[ index ] = entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }         
+      }
 };
 
 /****
@@ -116,43 +152,104 @@ class tnlNeumannBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, F
 {
    public:
 
-   typedef tnlGrid< 2, MeshReal, Device, MeshIndex > MeshType;
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-
-   typedef Function FunctionType;
-   typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
-   typedef tnlStaticVector< 2, RealType > VertexType;
-   typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType;
-   typedef tnlNeumannBoundaryConditionsBase< Function > BaseType;
-   
-
-   template< typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-   const RealType operator()( const MeshFunction& u,
-                              const EntityType& entity,                            
-                              const RealType& time = 0 ) const;
-      
-   template< typename EntityType >
-   __cuda_callable__
-   Index getLinearSystemRowLength( const MeshType& mesh,
-                                   const IndexType& index,
-                                   const EntityType& entity ) const;
+      typedef tnlGrid< 2, MeshReal, Device, MeshIndex > MeshType;
+      typedef Real RealType;
+      typedef Device DeviceType;
+      typedef Index IndexType;
 
-   template< typename MatrixRow,
-             typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const EntityType& entity,
-                               const MeshFunction& u,
-                               DofVectorType& b,
-                               MatrixRow& matrixRow ) const;
+      typedef Function FunctionType;
+      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef tnlStaticVector< 2, RealType > VertexType;
+      typedef typename MeshType::CoordinatesType CoordinatesType;
+      typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType;
+      typedef tnlNeumannBoundaryConditionsBase< Function > BaseType;
+
+
+      template< typename EntityType,
+                typename MeshFunction >
+      __cuda_callable__
+      const RealType operator()( const MeshFunction& u,
+                                 const EntityType& entity,                            
+                                 const RealType& time = 0 ) const
+      {
+         const MeshType& mesh = entity.getMesh();
+         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const IndexType& index = entity.getIndex();
+         if( entity.getCoordinates().x() == 0 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] - entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == 0 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 0, 1 >() ] - entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] + entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }         
+      }
+
+      template< typename EntityType >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const EntityType& entity ) const
+      {
+         return 2;
+      }
+
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
+      __cuda_callable__
+      void setMatrixElements( const PreimageFunction& u,
+                              const MeshEntity& entity,
+                              const RealType& time,
+                              const RealType& tau,
+                              Matrix& matrix,
+                              Vector& b ) const
+      {
+         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const IndexType& index = entity.getIndex();
+         typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
+         if( entity.getCoordinates().x() == 0 )
+         {
+            matrixRow.setElement( 0, index,                                                1.0 );
+            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1, 0 >(), -1.0 );
+            b[ index ] = - entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
+         {
+            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1, 0 >(), -1.0 );
+            matrixRow.setElement( 1, index,                                                 1.0 );
+            b[ index ] = entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == 0 )
+         {
+            matrixRow.setElement( 0, index,                                                1.0 );
+            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 1 >(), -1.0 );
+            b[ index ] = - entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
+         {
+            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1 >(), -1.0 );
+            matrixRow.setElement( 1, index,                                                 1.0 );
+            b[ index ] = entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }         
+      }
 };
 
 /****
@@ -174,43 +271,128 @@ class tnlNeumannBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, F
 {
    public:
 
-   typedef tnlGrid< 3, MeshReal, Device, MeshIndex > MeshType;
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-
-   typedef Function FunctionType;
-   typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
-   typedef tnlStaticVector< 3, RealType > VertexType;
-   typedef typename MeshType::CoordinatesType CoordinatesType;
-   typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType;
-   typedef tnlNeumannBoundaryConditionsBase< Function > BaseType;   
-
-   template< typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-   const RealType operator()( const MeshFunction& u,
-                              const EntityType& entity,
-                              const RealType& time = 0 ) const;
-   
-
-   template< typename EntityType >
-   __cuda_callable__
-   Index getLinearSystemRowLength( const MeshType& mesh,
-                                   const IndexType& index,
-                                   const EntityType& entity ) const;
+      typedef tnlGrid< 3, MeshReal, Device, MeshIndex > MeshType;
+      typedef Real RealType;
+      typedef Device DeviceType;
+      typedef Index IndexType;
 
-   template< typename MatrixRow,
-             typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-      void updateLinearSystem( const RealType& time,
-                               const MeshType& mesh,
-                               const IndexType& index,
-                               const EntityType& entity,
-                               const MeshFunction& u,
-                               DofVectorType& b,
-                               MatrixRow& matrixRow ) const;
+      typedef Function FunctionType;
+      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef tnlStaticVector< 3, RealType > VertexType;
+      typedef typename MeshType::CoordinatesType CoordinatesType;
+      typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType;
+      typedef tnlNeumannBoundaryConditionsBase< Function > BaseType;   
+
+      template< typename EntityType,
+                typename MeshFunction >
+      __cuda_callable__
+      const RealType operator()( const MeshFunction& u,
+                                 const EntityType& entity,
+                                 const RealType& time = 0 ) const
+      {
+         const MeshType& mesh = entity.getMesh();
+         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const IndexType& index = entity.getIndex();
+         if( entity.getCoordinates().x() == 0 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] - entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == 0 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] - entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] + entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().z() == 0 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] - entity.getMesh().getSpaceSteps().z() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().z() == entity.getMesh().getDimensions().z() - 1 )
+         {
+            return u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] + entity.getMesh().getSpaceSteps().z() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }   
+      }
+
+
+      template< typename EntityType >
+      __cuda_callable__
+      Index getLinearSystemRowLength( const MeshType& mesh,
+                                      const IndexType& index,
+                                      const EntityType& entity ) const
+      {
+         return 2;
+      }
+
+      template< typename PreimageFunction,
+                typename MeshEntity,
+                typename Matrix,
+                typename Vector >
+      __cuda_callable__
+      void setMatrixElements( const PreimageFunction& u,
+                                     const MeshEntity& entity,
+                                     const RealType& time,
+                                     const RealType& tau,
+                                     Matrix& matrix,
+                                     Vector& b ) const
+      {
+         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const IndexType& index = entity.getIndex();
+         typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
+         if( entity.getCoordinates().x() == 0 )
+         {
+            matrixRow.setElement( 0, index,                                                   1.0 );
+            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1, 0, 0 >(), -1.0 );
+            b[ index ] = - entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
+         {
+            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1, 0, 0 >(), -1.0 );
+            matrixRow.setElement( 1, index,                                                    1.0 );
+            b[ index ] = entity.getMesh().getSpaceSteps().x() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == 0 )
+         {
+            matrixRow.setElement( 0, index,                                                   1.0 );
+            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 1, 0 >(), -1.0 );
+            b[ index ] = - entity.getMesh().getSpaceSteps().y() * 
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
+         {
+            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1, 0 >(), -1.0 );
+            matrixRow.setElement( 1, index,                                                    1.0 );
+            b[ index ] = entity.getMesh().getSpaceSteps().y() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().z() == 0 )
+         {
+            matrixRow.setElement( 0, index,                                                   1.0 );
+            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 0, 1 >(), -1.0 );
+            b[ index ] = - entity.getMesh().getSpaceSteps().z() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+         if( entity.getCoordinates().z() == entity.getMesh().getDimensions().z() - 1 )
+         {
+            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, 0, -1 >(), -1.0 );
+            matrixRow.setElement( 1, index,                                                    1.0 );
+            b[ index ] = entity.getMesh().getSpaceSteps().z() *
+               tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
+         }
+      }
 };
 
 template< typename Mesh,
diff --git a/src/operators/tnlNeumannBoundaryConditions_impl.h b/src/operators/tnlNeumannBoundaryConditions_impl.h
index adb01b87ee245a2978f2a230a11db09081103261..a9c39d0722183b4d0262e584bf35b8afd2611af4 100644
--- a/src/operators/tnlNeumannBoundaryConditions_impl.h
+++ b/src/operators/tnlNeumannBoundaryConditions_impl.h
@@ -49,7 +49,7 @@ getFunction() const
 /****
  * 1D grid
  */
-
+/*
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -106,7 +106,7 @@ template< typename MeshReal,
 __cuda_callable__
 void
 tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Function, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const MeshType& mesh,
                     const IndexType& index,
                     const EntityType& entity,
@@ -130,11 +130,12 @@ updateLinearSystem( const RealType& time,
       b[ index ] = mesh.getSpaceSteps().x() *
          tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
    }
-}
+}*/
 
 /****
  * 2D grid
  */
+/*
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -204,7 +205,7 @@ template< typename MeshReal,
 __cuda_callable__
 void
 tnlNeumannBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, Function, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const MeshType& mesh,
                     const IndexType& index,
                     const EntityType& entity,
@@ -242,11 +243,12 @@ updateLinearSystem( const RealType& time,
       b[ index ] = mesh.getSpaceSteps().y() *
          tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
    }
-}
+}*/
 
 /****
  * 3D grid
  */
+/*
 template< typename MeshReal,
           typename Device,
           typename MeshIndex,
@@ -326,7 +328,7 @@ template< typename MeshReal,
 __cuda_callable__
 void
 tnlNeumannBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, Function, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const MeshType& mesh,
                     const IndexType& index,
                     const EntityType& entity,                    
@@ -379,6 +381,7 @@ updateLinearSystem( const RealType& time,
          tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
    }
 }
+*/
 
 #endif	/* TNLNEUMANNBOUNDARYCONDITIONS_IMPL_H */
 
diff --git a/src/problems/tnlHeatEquationProblem.h b/src/problems/tnlHeatEquationProblem.h
index 704f25ca27fc749d388cb2d50495d576e64b0d57..c47cf4072376d4ae668670994f0c4e851ccc7341 100644
--- a/src/problems/tnlHeatEquationProblem.h
+++ b/src/problems/tnlHeatEquationProblem.h
@@ -29,6 +29,7 @@
 #include <operators/diffusion/tnlLinearDiffusion.h>
 #include <matrices/tnlEllpackMatrix.h>
 #include <functions/tnlMeshFunction.h>
+#include <core/tnlTimer.h>
 
 template< typename Mesh,
           typename BoundaryCondition,
@@ -59,6 +60,9 @@ class tnlHeatEquationProblem : public tnlPDEProblem< Mesh,
 
       void writeProlog( tnlLogger& logger,
                         const tnlParameterContainer& parameters ) const;
+      
+      bool writeEpilog( tnlLogger& logger );
+
 
       bool setup( const tnlParameterContainer& parameters );
 
@@ -108,6 +112,8 @@ class tnlHeatEquationProblem : public tnlPDEProblem< Mesh,
          BoundaryCondition boundaryCondition;
 
          RightHandSide rightHandSide;
+         
+         tnlTimer gpuTransferTimer;
 };
 
 #include <problems/tnlHeatEquationProblem_impl.h>
diff --git a/src/problems/tnlHeatEquationProblem_impl.h b/src/problems/tnlHeatEquationProblem_impl.h
index ce68ff2b4cf1b40846cff2931af31f4aa38fbdc5..11f1f8569ed90539d3d5c2f8378c37d92470b97c 100644
--- a/src/problems/tnlHeatEquationProblem_impl.h
+++ b/src/problems/tnlHeatEquationProblem_impl.h
@@ -68,6 +68,19 @@ writeProlog( tnlLogger& logger, const tnlParameterContainer& parameters ) const
 {
 }
 
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename DifferentialOperator >
+bool
+tnlHeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
+writeEpilog( tnlLogger& logger )
+{
+   logger.writeParameter< const char* >( "GPU transfer time:", "" );
+   this->gpuTransferTimer.writeLog( logger, 1 );
+   return true;
+}
+
 template< typename Mesh,
           typename BoundaryCondition,
           typename RightHandSide,
@@ -76,6 +89,7 @@ bool
 tnlHeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
 setup( const tnlParameterContainer& parameters )
 {
+   this->gpuTransferTimer.reset();
    if( ! this->boundaryCondition.setup( parameters, "boundary-conditions-" ) ||
        ! this->rightHandSide.setup( parameters, "right-hand-side-" ) )
       return false;
@@ -207,6 +221,7 @@ getExplicitRHS( const RealType& time,
    this->bindDofs( mesh, uDofs );
    MeshFunctionType fu( mesh, fuDofs );
    tnlExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater;
+   explicitUpdater.setGPUTransferTimer( this->gpuTransferTimer ); 
    explicitUpdater.template update< typename Mesh::Cell >( 
       time,
       mesh,
diff --git a/src/problems/tnlPDEProblem.h b/src/problems/tnlPDEProblem.h
index 7be3d7f0a161a2c0a7295961c73c4d94bad6bb16..11f2a7cab0c581bb82f653fc783a5c75420a1eb2 100644
--- a/src/problems/tnlPDEProblem.h
+++ b/src/problems/tnlPDEProblem.h
@@ -50,6 +50,9 @@ class tnlPDEProblem : public tnlProblem< Real, Device, Index >
 
       void writeProlog( tnlLogger& logger,
                         const tnlParameterContainer& parameters ) const;
+      
+      bool writeEpilog( tnlLogger& logger ) const;
+
 
       bool setMeshDependentData( const MeshType& mesh,
                                  MeshDependentDataType& meshDependentData );
@@ -62,6 +65,11 @@ class tnlPDEProblem : public tnlProblem< Real, Device, Index >
                        const MeshType& mesh,
                        DofVectorType& dofs,
                        MeshDependentDataType& meshDependentData );
+      
+      void setExplicitBoundaryConditions( const RealType& time,
+                                          const MeshType& mesh,
+                                          DofVectorType& dofs,
+                                          MeshDependentDataType& meshDependentData );
 
       bool postIterate( const RealType& time,
                         const RealType& tau,
diff --git a/src/problems/tnlPDEProblem_impl.h b/src/problems/tnlPDEProblem_impl.h
index 9954e2b1f4294c348fd4f197f7e3dfb6e08f273d..0955ff0c3db72725098413c15b57c4ec88d29da8 100644
--- a/src/problems/tnlPDEProblem_impl.h
+++ b/src/problems/tnlPDEProblem_impl.h
@@ -54,6 +54,18 @@ writeProlog( tnlLogger& logger, const tnlParameterContainer& parameters ) const
 {
 }
 
+template< typename Mesh,
+          typename Real,
+          typename Device,
+          typename Index >
+bool
+tnlPDEProblem< Mesh, Real, Device, Index >::
+writeEpilog( tnlLogger& logger ) const
+{
+   return true;
+}
+
+
 template< typename Mesh,
           typename Real,
           typename Device,
@@ -95,6 +107,20 @@ preIterate( const RealType& time,
    return true;
 }
 
+template< typename Mesh,
+          typename Real,
+          typename Device,
+          typename Index >
+void
+tnlPDEProblem< Mesh, Real, Device, Index >::
+setExplicitBoundaryConditions( const RealType& time,
+                               const MeshType& mesh,
+                               DofVectorType& dofs,
+                               MeshDependentDataType& meshDependentData )
+{   
+}
+
+
 template< typename Mesh,
           typename Real,
           typename Device,
diff --git a/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h b/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h
index ee42d6b199160c5b1474e03ff1a9e48882ead691..09021f4d2beccdb396464c41525623d3ea135576 100644
--- a/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h
+++ b/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h
@@ -323,7 +323,7 @@ void tnlNavierStokesSolver< AdvectionScheme,
       const IndexType size = dofs_rho.getSize();
 
       #ifdef HAVE_OPENMP
-      #pragma omp parallel for, if( tnlOmp::isEnabled() )
+      #pragma omp parallel for, if( tnlHost::isOMPEnabled() )
       #endif
       for( IndexType c = 0; c < size; c++ )
          {
@@ -447,7 +447,7 @@ void tnlNavierStokesSolver< AdvectionScheme,
    writePhysicalVariables( time, -4 );
 
 #ifdef HAVE_OPENMP
-  #pragma omp parallel for, if( tnlOmp::isEnabled() )
+  #pragma omp parallel for, if( tnlHost::isOMPEnabled() )
   #endif
   for( IndexType j = 0; j < ySize; j ++ )
      for( IndexType i = 0; i < xSize; i ++ )
diff --git a/src/solvers/linear/krylov/tnlTFQMRSolver.h b/src/solvers/linear/krylov/tnlTFQMRSolver.h
index ab25f3e349e752bdac403a2323fada4fe34aa30e..32d250f52a18ecf23f961dabc04b1c5a0b89e1e4 100644
--- a/src/solvers/linear/krylov/tnlTFQMRSolver.h
+++ b/src/solvers/linear/krylov/tnlTFQMRSolver.h
@@ -75,7 +75,9 @@ class tnlTFQMRSolver : public tnlObject,
 
    bool setSize( IndexType size );
 
-   tnlVector< RealType, Device, IndexType >  d, r, w, u, v, r_ast, Au;
+   tnlVector< RealType, Device, IndexType >  d, r, w, u, v, r_ast, Au, M_tmp;
+
+   IndexType size;
 
    const MatrixType* matrix;
    const PreconditionerType* preconditioner;
diff --git a/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h b/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h
index e2f7d210d3ea93e4b257305f8079adc1c1b079a9..72dd40e313478ca3e6dcb53a8bd76fa93fb9f2ed 100644
--- a/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h
+++ b/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h
@@ -18,17 +18,12 @@
 #ifndef tnlTFQMRSolver_implH
 #define tnlTFQMRSolver_implH
 
-template< typename RealType,
-          typename Vector >
-RealType computeTFQMRNewP( Vector& p,
-                           const Vector&r,
-                           const RealType& beta,
-                           const RealType& omega,
-                           const Vector& Ap );
-
 template< typename Matrix,
           typename Preconditioner >
 tnlTFQMRSolver< Matrix, Preconditioner > :: tnlTFQMRSolver()
+: size( 0 ),
+  matrix( 0 ),
+  preconditioner( 0 )
 {
 }
 
@@ -83,15 +78,29 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector&
    dbgFunctionName( "tnlTFQMRSolver", "Solve" );
    if( ! this->setSize( matrix -> getRows() ) ) return false;
 
-   RealType tau, theta, eta, rho, alpha, w_norm;
-   RealType b_norm = b. lpNorm( 2.0 );
-   if( b_norm == 0.0 )
-       b_norm = 1.0;
+   RealType tau, theta, eta, rho, alpha, b_norm, w_norm;
 
-   this->matrix -> vectorProduct( x, r );
-   r. addVector( b, 1.0, -1.0 );
+   if( preconditioner ) {
+      preconditioner -> solve( b, M_tmp );
+      b_norm = M_tmp. lpNorm( ( RealType ) 2.0 );
+
+      matrix -> vectorProduct( x, M_tmp );
+      M_tmp.addVector( b, 1.0, -1.0 );
+      preconditioner -> solve( M_tmp, r );
+   }
+   else {
+      b_norm = b. lpNorm( 2.0 );
+      matrix -> vectorProduct( x, r );
+      r.addVector( b, 1.0, -1.0 );
+   }
    w = u = r;
-   matrix -> vectorProduct( u, Au );
+   if( preconditioner ) {
+      matrix -> vectorProduct( u, M_tmp );
+      preconditioner -> solve( M_tmp, Au );
+   }
+   else {
+      matrix -> vectorProduct( u, Au );
+   }
    v = Au;
    d. setValue( 0.0 );
    tau = r. lpNorm( 2.0 );
@@ -101,6 +110,9 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector&
    // only to avoid compiler warning; alpha is initialized inside the loop
    alpha = 0.0;
 
+   if( b_norm == 0.0 )
+       b_norm = 1.0;
+
    this->resetIterations();
    this->setResidue( tau / b_norm );
 
@@ -114,7 +126,13 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector&
       else {
          // not necessary in odd iter since the previous iteration
          // already computed v_{m+1} = A*u_{m+1}
-         matrix -> vectorProduct( u, Au );
+         if( preconditioner ) {
+            matrix -> vectorProduct( u, M_tmp );
+            preconditioner -> solve( M_tmp, Au );
+         }
+         else {
+            matrix -> vectorProduct( u, Au );
+         }
       }
       w.addVector( Au, -alpha );
       d.addVector( u, 1.0, theta * theta * eta / alpha );
@@ -137,7 +155,13 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector&
 
          u.addVector( w, 1.0, beta );
          v.addVector( Au, beta, beta * beta );
-         matrix -> vectorProduct( u, Au );
+         if( preconditioner ) {
+            matrix -> vectorProduct( u, M_tmp );
+            preconditioner -> solve( M_tmp, Au );
+         }
+         else {
+            matrix -> vectorProduct( u, Au );
+         }
          v.addVector( Au, 1.0 );
       }
       else {
@@ -165,13 +189,17 @@ template< typename Matrix,
           typename Preconditioner >
 bool tnlTFQMRSolver< Matrix, Preconditioner > :: setSize( IndexType size )
 {
+   if( this->size == size )
+      return true;
+   this->size = size;
    if( ! d. setSize( size ) ||
        ! r. setSize( size ) ||
        ! w. setSize( size ) ||
        ! u. setSize( size ) ||
        ! v. setSize( size ) ||
        ! r_ast. setSize( size ) ||
-       ! Au. setSize( size ) )
+       ! Au. setSize( size ) ||
+       ! M_tmp. setSize( size ) )
    {
       cerr << "I am not able to allocate all supporting vectors for the TFQMR solver." << endl;
       return false;
diff --git a/src/solvers/ode/tnlEulerSolver.h b/src/solvers/ode/tnlEulerSolver.h
index 428bf19d550c5cb7b932c234ab1f7b9c7360b38b..13df781c2632042d34750083c4ce99e4958e30de 100644
--- a/src/solvers/ode/tnlEulerSolver.h
+++ b/src/solvers/ode/tnlEulerSolver.h
@@ -22,6 +22,7 @@
 #include <config/tnlConfigDescription.h>
 #include <solvers/ode/tnlExplicitSolver.h>
 #include <config/tnlParameterContainer.h>
+#include <core/tnlTimer.h>
 
 template< typename Problem >
 class tnlEulerSolver : public tnlExplicitSolver< Problem >
@@ -61,7 +62,7 @@ class tnlEulerSolver : public tnlExplicitSolver< Problem >
 
    RealType cflCondition;
    
-   tnlTimerRT timer, updateTimer;
+   //tnlTimer timer, updateTimer;
 };
 
 #include <solvers/ode/tnlEulerSolver_impl.h>
diff --git a/src/solvers/ode/tnlEulerSolver_impl.h b/src/solvers/ode/tnlEulerSolver_impl.h
index b682fcefe96a77eb1de9d5479a30615cdeba68db..b9284d6a609f5833c663f4eb33e0051b01aabc5c 100644
--- a/src/solvers/ode/tnlEulerSolver_impl.h
+++ b/src/solvers/ode/tnlEulerSolver_impl.h
@@ -32,8 +32,8 @@ template< typename Problem >
 tnlEulerSolver< Problem > :: tnlEulerSolver()
 : cflCondition( 0.0 )
 {
-   timer.reset();
-   updateTimer.reset();
+   //timer.reset();
+   //updateTimer.reset();
 };
 
 template< typename Problem >
@@ -80,7 +80,7 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u )
    /****
     * First setup the supporting meshes k1...k5 and k_tmp.
     */
-   timer.start();
+   //timer.start();
    if( ! k1. setLike( u ) )
    {
       cerr << "I do not have enough memory to allocate a supporting grid for the Euler explicit solver." << endl;
@@ -109,9 +109,9 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u )
       /****
        * Compute the RHS
        */
-      timer.stop();
+      //timer.stop();
       this->problem->getExplicitRHS( time, currentTau, u, k1 );
-      timer.start();
+      //timer.start();
 
       RealType lastResidue = this->getResidue();
       RealType maxResidue( 0.0 );
@@ -125,9 +125,9 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u )
          }
       }
       RealType newResidue( 0.0 );
-      updateTimer.start();
+      //updateTimer.start();
       computeNewTimeLevel( u, currentTau, newResidue );
-      updateTimer.stop();
+      //updateTimer.stop();
       this->setResidue( newResidue );
 
       /****
@@ -156,8 +156,8 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u )
           ( this->getConvergenceResidue() != 0.0 && this->getResidue() < this->getConvergenceResidue() ) )
       {
          this->refreshSolverMonitor();
-         std::cerr << std::endl << "RHS Timer = " << timer.getTime() << std::endl;
-         std::cerr << std::endl << "Update Timer = " << updateTimer.getTime() << std::endl;
+         //std::cerr << std::endl << "RHS Timer = " << timer.getRealTime() << std::endl;
+         //std::cerr << std::endl << "Update Timer = " << updateTimer.getRealTime() << std::endl;
          return true;
       }
 
@@ -175,15 +175,15 @@ void tnlEulerSolver< Problem > :: computeNewTimeLevel( DofVectorType& u,
                                                        RealType& currentResidue )
 {
    RealType localResidue = RealType( 0.0 );
-   IndexType size = k1. getSize();
+   const IndexType size = k1. getSize();
    RealType* _u = u. getData();
    RealType* _k1 = k1. getData();
 
    if( std::is_same< DeviceType, tnlHost >::value )
    {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:localResidue) firstprivate( _u, _k1, tau ) if( tnlOmp::isEnabled() )
-#endif
+//#ifdef HAVE_OPENMP
+//#pragma omp parallel for reduction(+:localResidue) firstprivate( _u, _k1, tau ) if( tnlHost::isOMPEnabled() )
+//#endif
       for( IndexType i = 0; i < size; i ++ )
       {
          const RealType add = tau * _k1[ i ];
diff --git a/src/solvers/ode/tnlMersonSolver_impl.h b/src/solvers/ode/tnlMersonSolver_impl.h
index 4c0fdad7e245af80f89f2206e5d5b3776df4cbc8..fee7015670cd1cfcc78f1862fe38e3e305059b59 100644
--- a/src/solvers/ode/tnlMersonSolver_impl.h
+++ b/src/solvers/ode/tnlMersonSolver_impl.h
@@ -274,28 +274,28 @@ void tnlMersonSolver< Problem > :: computeKFunctions( DofVectorType& u,
       this->problem->getExplicitRHS( time, tau, u, k1 );
 
    #ifdef HAVE_OPENMP
-   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, tau, tau_3 ) if( tnlOmp::isEnabled() )
+   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, tau, tau_3 ) if( tnlHost::isOMPEnabled() )
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * ( 1.0 / 3.0 * _k1[ i ] );
       this->problem->getExplicitRHS( time + tau_3, tau, kAux, k2 );
 
    #ifdef HAVE_OPENMP
-   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k2, tau, tau_3 ) if( tnlOmp::isEnabled() )
+   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k2, tau, tau_3 ) if( tnlHost::isOMPEnabled() )
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * 1.0 / 6.0 * ( _k1[ i ] + _k2[ i ] );
       this->problem->getExplicitRHS( time + tau_3, tau, kAux, k3 );
 
    #ifdef HAVE_OPENMP
-   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, tau, tau_3 ) if( tnlOmp::isEnabled() )
+   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, tau, tau_3 ) if( tnlHost::isOMPEnabled() )
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * ( 0.125 * _k1[ i ] + 0.375 * _k3[ i ] );
       this->problem->getExplicitRHS( time + 0.5 * tau, tau, kAux, k4 );
 
    #ifdef HAVE_OPENMP
-   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, _k4, tau, tau_3 ) if( tnlOmp::isEnabled() )
+   #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, _k4, tau, tau_3 ) if( tnlHost::isOMPEnabled() )
    #endif
       for( IndexType i = 0; i < size; i ++ )
          _kAux[ i ] = _u[ i ] + tau * ( 0.5 * _k1[ i ] - 1.5 * _k3[ i ] + 2.0 * _k4[ i ] );
@@ -454,7 +454,7 @@ void tnlMersonSolver< Problem > :: computeNewTimeLevel( DofVectorType& u,
    if( DeviceType :: getDevice() == tnlHostDevice )
    {
 #ifdef HAVE_OPENMP
-#pragma omp parallel for reduction(+:localResidue) firstprivate( size, _u, _k1, _k4, _k5, tau ) if( tnlOmp::isEnabled() )
+#pragma omp parallel for reduction(+:localResidue) firstprivate( size, _u, _k1, _k4, _k5, tau ) if( tnlHost::isOMPEnabled() )
 #endif
       for( IndexType i = 0; i < size; i ++ )
       {
diff --git a/src/solvers/pde/tnlExplicitTimeStepper.h b/src/solvers/pde/tnlExplicitTimeStepper.h
index 3394733c765b59eb88328f75b49e7a196819d87f..9c251443789c79ad7c453a75640df1d16a36ddab 100644
--- a/src/solvers/pde/tnlExplicitTimeStepper.h
+++ b/src/solvers/pde/tnlExplicitTimeStepper.h
@@ -21,7 +21,7 @@
 #include <solvers/ode/tnlODESolverMonitor.h>
 #include <config/tnlConfigDescription.h>
 #include <config/tnlParameterContainer.h>
-#include <core/tnlTimerRT.h>
+#include <core/tnlTimer.h>
 #include <core/tnlLogger.h>
 
 
@@ -85,7 +85,9 @@ class tnlExplicitTimeStepper
 
    MeshDependentDataType* meshDependentData;
    
-   tnlTimerRT explicitUpdaterTimer, mainTimer;
+   tnlTimer preIterateTimer, explicitUpdaterTimer, mainTimer, postIterateTimer;
+   
+   long long int allIterations;
 };
 
 #include <solvers/pde/tnlExplicitTimeStepper_impl.h>
diff --git a/src/solvers/pde/tnlExplicitTimeStepper_impl.h b/src/solvers/pde/tnlExplicitTimeStepper_impl.h
index f071b3af0acf7b2ba7c543bfda8362743f6ed773..ea1fb5a436a2e6f506892e2c63ad4c38ce5afc61 100644
--- a/src/solvers/pde/tnlExplicitTimeStepper_impl.h
+++ b/src/solvers/pde/tnlExplicitTimeStepper_impl.h
@@ -27,7 +27,8 @@ tnlExplicitTimeStepper< Problem, OdeSolver >::
 tnlExplicitTimeStepper()
 : odeSolver( 0 ),
   problem( 0 ),
-  timeStep( 0 )
+  timeStep( 0 ),
+  allIterations( 0 )
 {
 };
 
@@ -58,6 +59,8 @@ init( const MeshType& mesh )
 {
    this->explicitUpdaterTimer.reset();
    this->mainTimer.reset();
+   this->preIterateTimer.reset();
+   this->postIterateTimer.reset();
    return true;
 }
 
@@ -123,8 +126,12 @@ solve( const RealType& time,
       this->odeSolver->setMaxTau( ( stopTime - time ) / ( typename OdeSolver< Problem >::RealType ) this->odeSolver->getMinIterations() );
    this->mesh = &mesh;
    this->meshDependentData = &meshDependentData;
-   return this->odeSolver->solve( dofVector );
+   if( ! this->odeSolver->solve( dofVector ) )
+      return false;
+   this->problem->setExplicitBoundaryConditions( stopTime, *( this->mesh ), dofVector, *( this->meshDependentData ) );
    mainTimer.stop();
+   this->allIterations += this->odeSolver->getIterations();
+   return true;
 }
 
 template< typename Problem,
@@ -136,6 +143,7 @@ getExplicitRHS( const RealType& time,
                 DofVectorType& u,
                 DofVectorType& fu )
 {
+   this->preIterateTimer.start();
    if( ! this->problem->preIterate( time,
                                     tau,
                                     *( this->mesh),
@@ -146,9 +154,12 @@ getExplicitRHS( const RealType& time,
       return;
       //return false; // TODO: throw exception
    }
+   this->preIterateTimer.stop();
    this->explicitUpdaterTimer.start();
+   this->problem->setExplicitBoundaryConditions( time, *( this->mesh ), u, *( this->meshDependentData ) );
    this->problem->getExplicitRHS( time, tau, *( this->mesh ), u, fu, *( this->meshDependentData ) );
    this->explicitUpdaterTimer.stop();
+   this->postIterateTimer.start();
    if( ! this->problem->postIterate( time,
                                      tau,
                                      *( this->mesh ),
@@ -159,6 +170,7 @@ getExplicitRHS( const RealType& time,
       return;
       //return false; // TODO: throw exception
    }
+   this->postIterateTimer.stop();
 }
 
 template< typename Problem,
@@ -167,8 +179,15 @@ bool
 tnlExplicitTimeStepper< Problem, OdeSolver >::
 writeEpilog( tnlLogger& logger )
 {
-   logger.writeParameter< double >( "Explicit update computation time:", this->explicitUpdaterTimer.getTime() );
-   logger.writeParameter< double >( "Explicit time stepper time:", this->mainTimer.getTime() );
+   logger.writeParameter< long long int >( "Iterations count:", this->allIterations );
+   logger.writeParameter< const char* >( "Pre-iterate time:", "" );
+   this->preIterateTimer.writeLog( logger, 1 );   
+   logger.writeParameter< const char* >( "Explicit update computation:", "" );
+   this->explicitUpdaterTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Explicit time stepper time:", "" );
+   this->mainTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Post-iterate time:", "" );
+   this->postIterateTimer.writeLog( logger, 1 );   
    return true;
 }
 
diff --git a/src/solvers/pde/tnlExplicitUpdater.h b/src/solvers/pde/tnlExplicitUpdater.h
index 6956c8b29e9a200d054677f7ec31e737c78fe66b..597222b741bcd0c8aad030559af109d17a4d563c 100644
--- a/src/solvers/pde/tnlExplicitUpdater.h
+++ b/src/solvers/pde/tnlExplicitUpdater.h
@@ -19,6 +19,7 @@
 #define TNLEXPLICITUPDATER_H_
 
 #include <functions/tnlFunctionAdapter.h>
+#include <core/tnlTimer.h>
 
 template< typename Real,
           typename MeshFunction,
@@ -51,7 +52,8 @@ class tnlExplicitUpdaterTraverserUserData
         rightHandSide( &rightHandSide ),
         u( &u ),
         fu( &fu )
-      {};
+      {
+      };
 };
 
 
@@ -72,6 +74,14 @@ class tnlExplicitUpdater
                                                    DifferentialOperator,
                                                    BoundaryConditions,
                                                    RightHandSide > TraverserUserData;
+      
+      tnlExplicitUpdater()
+      : gpuTransferTimer( 0 ){}
+      
+      void setGPUTransferTimer( tnlTimer& timer )
+      {
+         this->gpuTransferTimer = &timer;
+      }
 
       template< typename EntityType >
       void update( const RealType& time,
@@ -126,6 +136,10 @@ class tnlExplicitUpdater
                      *userData.time );
             }
       };
+      
+   protected:
+      
+      tnlTimer* gpuTransferTimer;
 };
 
 #include <solvers/pde/tnlExplicitUpdater_impl.h>
diff --git a/src/solvers/pde/tnlExplicitUpdater_impl.h b/src/solvers/pde/tnlExplicitUpdater_impl.h
index 16a2c0346402907c21701d280609deddd27a3cad..a2c3ff1ce79b1547716f08b5ca1c69d5f011f5ab 100644
--- a/src/solvers/pde/tnlExplicitUpdater_impl.h
+++ b/src/solvers/pde/tnlExplicitUpdater_impl.h
@@ -23,6 +23,8 @@
 #include <mesh/grids/tnlTraverser_Grid2D.h>
 #include <mesh/grids/tnlTraverser_Grid3D.h>
 
+#include "tnlExplicitUpdater.h"
+
 template< typename Mesh,
           typename MeshFunction,
           typename DifferentialOperator,
@@ -60,12 +62,17 @@ update( const RealType& time,
    }
    if( std::is_same< DeviceType, tnlCuda >::value )
    {
+      if( this->gpuTransferTimer ) 
+         this->gpuTransferTimer->start();
       RealType* kernelTime = tnlCuda::passToDevice( time );
       DifferentialOperator* kernelDifferentialOperator = tnlCuda::passToDevice( differentialOperator );
       BoundaryConditions* kernelBoundaryConditions = tnlCuda::passToDevice( boundaryConditions );
       RightHandSide* kernelRightHandSide = tnlCuda::passToDevice( rightHandSide );
       MeshFunction* kernelU = tnlCuda::passToDevice( u );
       MeshFunction* kernelFu = tnlCuda::passToDevice( fu );
+     if( this->gpuTransferTimer ) 
+         this->gpuTransferTimer->stop();
+
       TraverserUserData userData( *kernelTime, *kernelDifferentialOperator, *kernelBoundaryConditions, *kernelRightHandSide, *kernelU, *kernelFu );
       checkCudaDevice;
       tnlTraverser< MeshType, EntityType > meshTraverser;
@@ -78,7 +85,10 @@ update( const RealType& time,
                                                     ( mesh,
                                                       userData );
 
-      checkCudaDevice;
+      if( this->gpuTransferTimer ) 
+         this->gpuTransferTimer->start();
+      
+      checkCudaDevice;      
       tnlCuda::freeFromDevice( kernelTime );
       tnlCuda::freeFromDevice( kernelDifferentialOperator );
       tnlCuda::freeFromDevice( kernelBoundaryConditions );
@@ -86,6 +96,10 @@ update( const RealType& time,
       tnlCuda::freeFromDevice( kernelU );
       tnlCuda::freeFromDevice( kernelFu );
       checkCudaDevice;
+      
+      if( this->gpuTransferTimer ) 
+         this->gpuTransferTimer->stop();
+
    }
 }
 
diff --git a/src/solvers/pde/tnlLinearSystemAssembler.h b/src/solvers/pde/tnlLinearSystemAssembler.h
index 14753fb7877ae1f1126b541345099a1da28a916f..74b7b6e44056915b0b9fbee59f1cd56d132b74f6 100644
--- a/src/solvers/pde/tnlLinearSystemAssembler.h
+++ b/src/solvers/pde/tnlLinearSystemAssembler.h
@@ -121,14 +121,13 @@ class tnlLinearSystemAssembler
                                     const EntityType& entity )
          {
              ( *userData.b )[ entity.getIndex() ] = 0.0;           
-             userData.boundaryConditions->updateLinearSystem
-               ( *userData.time + *userData.tau,
-                 mesh,
-                 entity.getIndex(),
+             userData.boundaryConditions->setMatrixElements
+               ( *userData.u,
                  entity,
-                 *userData.u,
-                 *userData.b,
-                 *userData.matrix );
+                 *userData.time + *userData.tau,
+                 *userData.tau,
+                 *userData.matrix,
+                 *userData.b );
          }
    };
 
@@ -143,15 +142,13 @@ class tnlLinearSystemAssembler
                                     const EntityType& entity )
          {
             ( *userData.b )[ entity.getIndex() ] = 0.0;            
-            userData.differentialOperator->updateLinearSystem
-               ( *userData.time,
-                 *userData.tau,
-                 mesh,
-                 entity.getIndex(),
+            userData.differentialOperator->setMatrixElements
+               ( *userData.u,
                  entity,
-                 *userData.u,
-                 *userData.b,
-                 *userData.matrix );
+                 *userData.time + *userData.tau,
+                 *userData.tau,
+                 *userData.matrix, 
+                 *userData.b );
             
             typedef tnlFunctionAdapter< MeshType, RightHandSide > RhsFunctionAdapter;
             typedef tnlFunctionAdapter< MeshType, MeshFunction > MeshFunctionAdapter;
diff --git a/src/solvers/pde/tnlPDESolver.h b/src/solvers/pde/tnlPDESolver.h
index 87c1f18e5b36a0814455703c9d689a54004f043b..03df9d6e00b961f5e424a5d8b3a633da8db781cc 100644
--- a/src/solvers/pde/tnlPDESolver.h
+++ b/src/solvers/pde/tnlPDESolver.h
@@ -21,7 +21,6 @@
 #include <core/tnlObject.h>
 #include <config/tnlConfigDescription.h>
 #include <config/tnlParameterContainer.h>
-#include <solvers/tnlSolverMonitor.h>
 #include <core/tnlLogger.h>
 
 template< typename Problem,
@@ -73,13 +72,9 @@ class tnlPDESolver : public tnlObject
 
       const RealType& getSnapshotPeriod() const;
 
-      void setIoRtTimer( tnlTimerRT& ioRtTimer);
+      void setIoTimer( tnlTimer& ioTimer);
 
-      void setComputeRtTimer( tnlTimerRT& computeRtTimer );
-
-      void setIoCpuTimer( tnlTimerCPU& ioCpuTimer );
-
-      void setComputeCpuTimer( tnlTimerCPU& computeCpuTimer );
+      void setComputeTimer( tnlTimer& computeTimer );
 
       bool solve();
 
@@ -99,10 +94,7 @@ class tnlPDESolver : public tnlObject
 
       ProblemType* problem;
 
-      tnlTimerRT *ioRtTimer, *computeRtTimer;
-
-      tnlTimerCPU *ioCpuTimer, *computeCpuTimer;
-
+      tnlTimer *ioTimer, *computeTimer;
 };
 
 #include <solvers/pde/tnlPDESolver_impl.h>
diff --git a/src/solvers/pde/tnlPDESolver_impl.h b/src/solvers/pde/tnlPDESolver_impl.h
index f7df40fc84992153b341a38e9f2aad515b2c0484..b0356073984a277c1af4930c41dff14927e3ed27 100644
--- a/src/solvers/pde/tnlPDESolver_impl.h
+++ b/src/solvers/pde/tnlPDESolver_impl.h
@@ -18,6 +18,9 @@
 #ifndef TNLPDESOLVER_IMPL_H_
 #define TNLPDESOLVER_IMPL_H_
 
+#include "tnlPDESolver.h"
+
+
 template< typename Problem,
           typename TimeStepper >
 tnlPDESolver< Problem, TimeStepper >::
@@ -29,10 +32,8 @@ tnlPDESolver()
   timeStep( 1.0 ),
   timeStepOrder( 0.0 ),
   problem( 0 ),
-  ioRtTimer( 0 ),
-  computeRtTimer( 0 ),
-  ioCpuTimer( 0 ),
-  computeCpuTimer( 0 )
+  ioTimer( 0 ),
+  computeTimer( 0 )
 {
 }
 
@@ -288,27 +289,15 @@ getTimeStepOrder() const
 }
 
 template< typename Problem, typename TimeStepper >
-void tnlPDESolver< Problem, TimeStepper > :: setIoRtTimer( tnlTimerRT& ioRtTimer )
-{
-   this->ioRtTimer = &ioRtTimer;
-}
-
-template< typename Problem, typename TimeStepper >
-void tnlPDESolver< Problem, TimeStepper > :: setComputeRtTimer( tnlTimerRT& computeRtTimer )
-{
-   this->computeRtTimer = &computeRtTimer;
-}
-
-template< typename Problem, typename TimeStepper >
-void tnlPDESolver< Problem, TimeStepper > :: setIoCpuTimer( tnlTimerCPU& ioCpuTimer )
+void tnlPDESolver< Problem, TimeStepper > :: setIoTimer( tnlTimer& ioTimer )
 {
-   this->ioCpuTimer = &ioCpuTimer;
+   this->ioTimer = &ioTimer;
 }
 
 template< typename Problem, typename TimeStepper >
-void tnlPDESolver< Problem, TimeStepper > :: setComputeCpuTimer( tnlTimerCPU& computeCpuTimer )
+void tnlPDESolver< Problem, TimeStepper > :: setComputeTimer( tnlTimer& computeTimer )
 {
-   this->computeCpuTimer = & computeCpuTimer;
+   this->computeTimer = &computeTimer;
 }
 
 template< typename Problem, typename TimeStepper >
@@ -330,23 +319,17 @@ solve()
    IndexType step( 0 );
    IndexType allSteps = ceil( ( this->finalTime - this->initialTime ) / this->snapshotPeriod );
 
-   this->ioRtTimer->reset();
-   this->ioCpuTimer->reset();
-   this->computeRtTimer->reset();
-   this->computeCpuTimer->reset();
+   this->ioTimer->reset();
+   this->computeTimer->reset();
    
-   this->ioRtTimer->start();
-   this->ioCpuTimer->start();
+   this->ioTimer->start();
    if( ! this->problem->makeSnapshot( t, step, mesh, this->dofs, this->meshDependentData ) )
    {
       cerr << "Making the snapshot failed." << endl;
       return false;
-   }
-
-   this->ioRtTimer->stop();
-   this->ioCpuTimer->stop();
-   this->computeRtTimer->start();
-   this->computeCpuTimer->start();
+   }   
+   this->ioTimer->stop();
+   this->computeTimer->start();
 
    /****
     * Initialize the time stepper
@@ -363,22 +346,17 @@ solve()
       step ++;
       t += tau;
 
-      this->ioRtTimer->start();
-      this->ioCpuTimer->start();
-      this->computeRtTimer->stop();
-      this->computeCpuTimer->stop();
-
+      this->ioTimer->start();
+      this->computeTimer->stop();
       if( ! this->problem->makeSnapshot( t, step, mesh, this->dofs, this->meshDependentData ) )
       {
          cerr << "Making the snapshot failed." << endl;
          return false;
       }
-
-      this->ioRtTimer->stop();
-      this->ioCpuTimer->stop();
-      this->computeRtTimer->start();
-      this->computeCpuTimer->start();
+      this->ioTimer->stop();
+      this->computeTimer->start();
    }
+   this->computeTimer->stop();
    return true;
 }
 
@@ -387,7 +365,8 @@ bool
 tnlPDESolver< Problem, TimeStepper >::
 writeEpilog( tnlLogger& logger ) const
 {
-   return this->timeStepper->writeEpilog( logger );
+   return ( this->timeStepper->writeEpilog( logger ) &&
+      this->problem->writeEpilog( logger ) );
 }
 
 #endif /* TNLPDESOLVER_IMPL_H_ */
diff --git a/src/solvers/pde/tnlSemiImplicitTimeStepper.h b/src/solvers/pde/tnlSemiImplicitTimeStepper.h
index 05f16704e760f44ab54d433211a56ddaa5a29623..5ce8dbf48b38f2db3d5992510a777d3072fe6235 100644
--- a/src/solvers/pde/tnlSemiImplicitTimeStepper.h
+++ b/src/solvers/pde/tnlSemiImplicitTimeStepper.h
@@ -80,9 +80,11 @@ class tnlSemiImplicitTimeStepper
 
    RealType timeStep;
 
-   tnlTimerRT preIterateTimer, linearSystemAssemblerTimer, linearSystemSolverTimer, postIterateTimer;
+   tnlTimer preIterateTimer, linearSystemAssemblerTimer, linearSystemSolverTimer, postIterateTimer;
    
    bool verbose;
+   
+   long long int allIterations;
 };
 
 #include <solvers/pde/tnlSemiImplicitTimeStepper_impl.h>
diff --git a/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h b/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h
index 4a873b67d3ae7f5eea6aceb7571e009b17de3fc7..34dede52de49d918cc624efef53cceb9245048d6 100644
--- a/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h
+++ b/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h
@@ -20,13 +20,16 @@
 
 #include <core/mfuncs.h>
 
+#include "tnlSemiImplicitTimeStepper.h"
+
 template< typename Problem,
           typename LinearSystemSolver >
 tnlSemiImplicitTimeStepper< Problem, LinearSystemSolver >::
 tnlSemiImplicitTimeStepper()
 : problem( 0 ),
   linearSystemSolver( 0 ),
-  timeStep( 0 )
+  timeStep( 0 ),
+  allIterations( 0 )
 {
 };
 
@@ -75,6 +78,7 @@ init( const MeshType& mesh )
       return false;
    this->linearSystemAssemblerTimer.reset();
    this->linearSystemSolverTimer.reset();
+   this->allIterations = 0;
    return true;
 }
 
@@ -187,6 +191,7 @@ solve( const RealType& time,
          return false;
       }
       this->linearSystemSolverTimer.stop();
+      this->allIterations += this->linearSystemSolver->getIterations();
 
       //if( verbose )
       //   cout << endl;
@@ -214,10 +219,15 @@ bool
 tnlSemiImplicitTimeStepper< Problem, LinearSystemSolver >::
 writeEpilog( tnlLogger& logger )
 {
-   logger.writeParameter< double >( "Pre-iterate time:", this->preIterateTimer.getTime() );
-   logger.writeParameter< double >( "Linear system assembler time:", this->linearSystemAssemblerTimer.getTime() );
-   logger.writeParameter< double >( "Linear system solver time:", this->linearSystemSolverTimer.getTime() );
-   logger.writeParameter< double >( "Post-iterate time:", this->postIterateTimer.getTime() );
+   logger.writeParameter< long long int >( "Iterations count:", this->allIterations );
+   logger.writeParameter< const char* >( "Pre-iterate time:", "" );
+   this->preIterateTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Linear system assembler time:", "" );
+   this->linearSystemAssemblerTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Linear system solver time:", "" );
+   this->linearSystemSolverTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Post-iterate time:", "" );
+   this->postIterateTimer.writeLog( logger, 1 );
    return true;
 }
 
diff --git a/src/solvers/tnlIterativeSolver_impl.h b/src/solvers/tnlIterativeSolver_impl.h
index 5841ec5b3cda1417c9e341032a26697ee0c10140..281948456788f74507f9e475654d961f7be242a0 100644
--- a/src/solvers/tnlIterativeSolver_impl.h
+++ b/src/solvers/tnlIterativeSolver_impl.h
@@ -38,7 +38,7 @@ template< typename Real, typename Index >
 void tnlIterativeSolver< Real, Index> :: configSetup( tnlConfigDescription& config,
                                                       const tnlString& prefix )
 {
-   config.addEntry< int >   ( prefix + "max-iterations", "Maximal number of iterations the solver may perform.", 100000 );
+   config.addEntry< int >   ( prefix + "max-iterations", "Maximal number of iterations the solver may perform.", 100000000000 );
    config.addEntry< int >   ( prefix + "min-iterations", "Minimal number of iterations the solver must perform.", 0 );
    config.addEntry< double >( prefix + "convergence-residue", "Convergence occurs when the residue drops bellow this limit.", 1.0e-6 );
    config.addEntry< double >( prefix + "divergence-residue", "Divergence occurs when the residue exceeds given limit.", DBL_MAX );
diff --git a/src/solvers/tnlSolverConfig_impl.h b/src/solvers/tnlSolverConfig_impl.h
index 01d005335aac5b531af61f05bb0768317688d382..fcf4221c421827eb730f17ffaa71f547e17ac7f9 100644
--- a/src/solvers/tnlSolverConfig_impl.h
+++ b/src/solvers/tnlSolverConfig_impl.h
@@ -156,7 +156,7 @@ bool tnlSolverConfig< ConfigTag, ProblemConfig >::configSetup( tnlConfigDescript
 
    config.addDelimiter( " === Logs and messages ===" );
    config.addEntry< int >( "verbose", "Set the verbose mode. The higher number the more messages are generated.", 1 );
-   config.addEntry< tnlString >( "log-file", "Log file for the computation." );
+   config.addEntry< tnlString >( "log-file", "Log file for the computation.", "log.txt" );
    config.addEntry< int >( "log-width", "Number of columns of the log table.", 80 );
    return true;
 
diff --git a/src/solvers/tnlSolverStarter.h b/src/solvers/tnlSolverStarter.h
index 7b9bc2d8f3757487ac9f479ac274ab18ddef4ddd..87c67ac8d72ea3c6fd7575b4c0e63375558001be 100644
--- a/src/solvers/tnlSolverStarter.h
+++ b/src/solvers/tnlSolverStarter.h
@@ -19,8 +19,7 @@
 #define TNLSOLVERSTARTER_H_
 
 #include <config/tnlParameterContainer.h>
-#include <core/tnlTimerRT.h>
-#include <core/tnlTimerCPU.h>
+#include <core/tnlTimer.h>
 #include <ostream>
 
 template< typename MeshConfig >
@@ -45,9 +44,7 @@ class tnlSolverStarter
 
    int logWidth;
 
-   tnlTimerRT ioRtTimer, computeRtTimer, totalRtTimer;
-
-   tnlTimerCPU ioCpuTimer, computeCpuTimer, totalCpuTimer;
+   tnlTimer ioTimer, computeTimer, totalTimer;
 };
 
 #include <solvers/tnlSolverStarter_impl.h>
diff --git a/src/solvers/tnlSolverStarter_impl.h b/src/solvers/tnlSolverStarter_impl.h
index 3f9e5fcc0201adb7d994497a48a9868897ac2f94..2126dec4cbc478522f809eca8530566ba11cccfd 100644
--- a/src/solvers/tnlSolverStarter_impl.h
+++ b/src/solvers/tnlSolverStarter_impl.h
@@ -21,7 +21,6 @@
 #include <tnlConfig.h>
 #include <core/tnlLogger.h>
 #include <core/tnlString.h>
-#include <core/tnlOmp.h>
 #include <core/tnlCuda.h>
 #include <solvers/ode/tnlMersonSolver.h>
 #include <solvers/ode/tnlEulerSolver.h>
@@ -92,7 +91,7 @@ bool tnlSolverStarter< ConfigTag > :: run( const tnlParameterContainer& paramete
    /****
     * Create and set-up the problem
     */
-   if( ! tnlOmp::setup( parameters ) ||
+   if( ! tnlHost::setup( parameters ) ||
        ! tnlCuda::setup( parameters ) )
       return false;
    Problem problem;
@@ -352,11 +351,14 @@ class tnlSolverStarterExplicitTimeStepperSetter
          typedef typename Problem::IndexType IndexType;
          typedef tnlODESolverMonitor< RealType, IndexType > SolverMonitorType;
 
+         const int verbose = parameters.getParameter< int >( "verbose" );
+
          ExplicitSolver explicitSolver;
          explicitSolver.setup( parameters );
-         int verbose = parameters.getParameter< int >( "verbose" );
          explicitSolver.setVerbose( verbose );
+
          SolverMonitorType odeSolverMonitor;
+         odeSolverMonitor.setVerbose( verbose );
          if( ! problem.getSolverMonitor() )
             explicitSolver.setSolverMonitor( odeSolverMonitor );
          else
@@ -395,10 +397,13 @@ class tnlSolverStarterSemiImplicitTimeStepperSetter
          typedef typename Problem::IndexType IndexType;
          typedef tnlIterativeSolverMonitor< RealType, IndexType > SolverMonitorType;
 
+         const int verbose = parameters.getParameter< int >( "verbose" );
+
          LinearSystemSolverType linearSystemSolver;
          linearSystemSolver.setup( parameters );
 
          SolverMonitorType solverMonitor;
+         solverMonitor.setVerbose( verbose );
          if( ! problem.getSolverMonitor() )
             linearSystemSolver.setSolverMonitor( solverMonitor );
          else
@@ -490,10 +495,8 @@ bool tnlSolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
                                                     const tnlParameterContainer& parameters,
                                                     TimeStepper& timeStepper )
 {
-   this->totalCpuTimer.reset();
-   this->totalRtTimer.reset();
-   this->totalCpuTimer.start();
-   this->totalRtTimer.start();
+   this->totalTimer.reset();
+   this->totalTimer.start();
    
 
    /****
@@ -537,16 +540,10 @@ bool tnlSolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
    /****
     * Set-up timers
     */
-   this->computeRtTimer.reset();
-   this->computeCpuTimer.reset();
-   this->ioRtTimer.reset();
-   this->ioRtTimer.stop();
-   this->ioCpuTimer.reset();
-   this->ioCpuTimer.stop();
-   solver.setComputeRtTimer( this->computeRtTimer );
-   solver.setComputeCpuTimer( this->computeCpuTimer );
-   solver.setIoRtTimer( this->ioRtTimer );
-   solver.setIoCpuTimer( this->ioCpuTimer );
+   this->computeTimer.reset();
+   this->ioTimer.reset();
+   solver.setComputeTimer( this->computeTimer );
+   solver.setIoTimer( this->ioTimer );
 
    /****
     * Start the solver
@@ -574,10 +571,8 @@ bool tnlSolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
    /****
     * Stop timers
     */
-   this->computeRtTimer.stop();
-   this->computeCpuTimer.stop();
-   this->totalCpuTimer.stop();
-   this->totalRtTimer.stop();
+   this->computeTimer.stop();   
+   this->totalTimer.stop();
 
    /****
     * Write an epilog
@@ -610,15 +605,15 @@ bool tnlSolverStarter< ConfigTag > :: writeEpilog( ostream& str, const Solver& s
    logger.writeSeparator();
    logger.writeCurrentTime( "Finished at:" );
    if( ! solver.writeEpilog( logger ) )
-      return false;
-   logger.writeParameter< double >( "IO Real Time:", this->ioRtTimer.getTime() );
-   logger.writeParameter< double >( "IO CPU Time:", this->ioCpuTimer.getTime() );
-   logger.writeParameter< double >( "Compute Real Time:", this->computeRtTimer.getTime() );
-   logger.writeParameter< double >( "Compute CPU Time:", this->computeCpuTimer.getTime() );
-   logger.writeParameter< double >( "Total Real Time:", this->totalRtTimer.getTime() );
-   logger.writeParameter< double >( "Total CPU Time:", this->totalCpuTimer.getTime() );
+      return false;   
+   logger.writeParameter< const char* >( "Compute time:", "" );
+   this->computeTimer.writeLog( logger, 1 );   
+   logger.writeParameter< const char* >( "I/O time:", "" );
+   this->ioTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Total time:", "" );
+   this->totalTimer.writeLog( logger, 1 );   
    char buf[ 256 ];
-   sprintf( buf, "%f %%", 100 * ( ( double ) this->totalCpuTimer.getTime() ) / this->totalRtTimer.getTime() );
+   sprintf( buf, "%f %%", 100 * ( ( double ) this->totalTimer.getCPUTime() ) / this->totalTimer.getRealTime() );
    logger.writeParameter< char* >( "CPU usage:", buf );
    logger.writeSeparator();
    return true;
diff --git a/src/solvers/tnlSolver_impl.h b/src/solvers/tnlSolver_impl.h
index 4c89c6cc94952c94dd305cee20d56c4ebd628de7..58c1bce84ffbe1d8b7dc9dec946741c0221453af 100644
--- a/src/solvers/tnlSolver_impl.h
+++ b/src/solvers/tnlSolver_impl.h
@@ -21,7 +21,6 @@
 #include <solvers/tnlSolverInitiator.h>
 #include <solvers/tnlSolverStarter.h>
 #include <solvers/tnlSolverConfig.h>
-#include <core/tnlOmp.h>
 #include <core/tnlCuda.h>
 
 template< template< typename Real, typename Device, typename Index, typename MeshType, typename MeshConfig, typename SolverStarter > class ProblemSetter,
@@ -36,7 +35,7 @@ run( int argc, char* argv[] )
    ProblemConfig< MeshConfig >::configSetup( configDescription );
    tnlSolverConfig< MeshConfig, ProblemConfig< MeshConfig> >::configSetup( configDescription );
    configDescription.addDelimiter( "Parallelization setup:" );
-   tnlOmp::configSetup( configDescription );
+   tnlHost::configSetup( configDescription );
    tnlCuda::configSetup( configDescription );
 
    if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h
index ed76f3e8b4d048fd5327392cf54a6dd8b9bc72a0..76269b7139f3c58d82721652f911297803a3ba26 100644
--- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h
+++ b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h
@@ -35,15 +35,15 @@ class BenchmarkLaplace< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index >
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const;
 
-      __cuda_callable__
       template< typename MeshEntity >
+      __cuda_callable__
       Index getLinearSystemRowLength( const MeshType& mesh,
                                       const IndexType& index,
                                       const MeshEntity& entity ) const;
 
       template< typename MeshEntity, typename Vector, typename MatrixRow >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
@@ -77,15 +77,15 @@ class BenchmarkLaplace< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const;
 
-      __cuda_callable__
       template< typename MeshEntity >
+      __cuda_callable__
       Index getLinearSystemRowLength( const MeshType& mesh,
                                       const IndexType& index,
                                       const MeshEntity& entity ) const;
 
       template< typename MeshEntity, typename Vector, typename MatrixRow >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
@@ -119,15 +119,15 @@ class BenchmarkLaplace< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const;
 
-      __cuda_callable__
       template< typename MeshEntity >
+      __cuda_callable__
       Index getLinearSystemRowLength( const MeshType& mesh,
                                       const IndexType& index,
                                       const MeshEntity& entity ) const;
 
       template< typename MeshEntity, typename Vector, typename MatrixRow >
       __cuda_callable__
-      void updateLinearSystem( const RealType& time,
+      void setMatrixElements( const RealType& time,
                                const RealType& tau,
                                const MeshType& mesh,
                                const IndexType& index,
diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
index 862eddc7878509a6dfde868bd31aacc0698db38f..6cec357de6f5a0a5cd5c6ae93668be721d48c559 100644
--- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
+++ b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
@@ -80,7 +80,7 @@ template< typename MeshReal,
 __cuda_callable__
 void
 BenchmarkLaplace< tnlGrid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -188,7 +188,7 @@ template< typename MeshReal,
 __cuda_callable__
 void
 BenchmarkLaplace< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
@@ -305,7 +305,7 @@ template< typename MeshReal,
 __cuda_callable__
 void
 BenchmarkLaplace< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-updateLinearSystem( const RealType& time,
+setMatrixElements( const RealType& time,
                     const RealType& tau,
                     const MeshType& mesh,
                     const IndexType& index,
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
index b1300dfeca4d6d3390c485e92315f68e45672729..ba9ce459a1855d616f91fe4937a6d230662d7374 100644
--- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
+++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
@@ -23,7 +23,7 @@ template< typename Mesh, typename Real >class HeatEquationBenchmarkRhs
          typedef typename MeshEntity::MeshType::VertexType VertexType;
          VertexType v = entity.getCenter();
          return 0.0;
-      };
+      }
 };
 
 #endif /* HeatEquationBenchmarkRHS_H_ */
diff --git a/tests/benchmarks/tnl-benchmark-simple-heat-equation.h b/tests/benchmarks/tnl-benchmark-simple-heat-equation.h
index 2d20cddce6144d668a933c471f8893d751e2394d..b30210815a3303fcc0206510c37a6cecdedf94af 100644
--- a/tests/benchmarks/tnl-benchmark-simple-heat-equation.h
+++ b/tests/benchmarks/tnl-benchmark-simple-heat-equation.h
@@ -22,6 +22,7 @@
 #include <stdio.h>
 #include <config/tnlConfigDescription.h>
 #include <config/tnlParameterContainer.h>
+#include <core/tnlTimer.h>
 #include <core/tnlTimerRT.h>
 #include <core/tnlCuda.h>
 
@@ -447,8 +448,8 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters )
    /****
     * Initiation
     */   
-   Real* u = new Real[ gridXSize * gridYSize ];
-   Real* aux = new Real[ gridXSize * gridYSize ];
+   Real* __restrict__ u = new Real[ gridXSize * gridYSize ];
+   Real* __restrict__ aux = new Real[ gridXSize * gridYSize ];
    if( ! u || ! aux )
    {
       cerr << "I am not able to allocate grid function for grid size " << gridXSize << "x" << gridYSize << "." << endl;
@@ -484,9 +485,7 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters )
     */
    if( verbose )
       cout << "Starting the solver main loop..." << endl;
-   tnlTimerRT timer, computationTimer;
-   computationTimer.reset();
-   timer.reset();
+   tnlTimer timer, computationTimer, updateTimer;
    timer.start();
    Real time( 0.0 );   
    Index iteration( 0 );
@@ -531,6 +530,7 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters )
          }
       computationTimer.stop();
       
+      updateTimer.start();
       Real absMax( 0.0 ), residue( 0.0 );
       for( Index i = 0; i < dofsCount; i++ )
       {
@@ -540,7 +540,7 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters )
          /*const Real a = fabs( aux[ i ] );         
          absMax = a > absMax ? a : absMax;*/
       }
-
+      updateTimer.stop();
       
       time += currentTau;
       iteration++;
@@ -550,8 +550,15 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters )
    timer.stop();
    if( verbose )      
       cout << endl << "Finished..." << endl;
-   cout << "Explicit update computation time is " << computationTimer.getTime() << " sec. i.e. " << computationTimer.getTime() / ( double ) iteration << "sec. per iteration." << endl;
-   cout << "Explicit time stepper time is " << timer.getTime() << " sec. i.e. " << timer.getTime() / ( double ) iteration << "sec. per iteration." << endl;
+   tnlLogger logger( 72, std::cout );
+   logger.writeSeparator();
+   logger.writeParameter< const char* >( "Compute time:", "" );
+   timer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Explicit update computation:", "" );
+   computationTimer.writeLog( logger, 1 );
+   logger.writeParameter< const char* >( "Euler solver update:", "" );
+   updateTimer.writeLog( logger, 1 );
+   logger.writeSeparator();
    
    /***
     * Freeing allocated memory
diff --git a/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h b/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h
index 827e3c4bbc1c8e5edeb8fb9c5a1029c7700bceca..6b53f3d910a6a962d501fff696d3e3abda7b3bd6 100644
--- a/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h
+++ b/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h
@@ -65,6 +65,8 @@ class tnlOneSidedMeanCurvatureTest
          this->setupMesh( meshSize );
          ApproximateOperator approximateOperator( this->mesh );
          ExactOperatorType exactOperator;
+         approximateOperator.setRegularizationEpsilon( 1.0 );
+         exactOperator.setRegularizationEpsilon( 1.0 );         
          this->performTest( approximateOperator,
                             exactOperator,
                             errors,
diff --git a/tools/tnl-compile.in b/tools/tnl-compile.in
index b5bd16e7ae1c88cc5138239211b73170e523a2eb..3ded5560392e047176921d69b35d35a93909fcf8 100644
--- a/tools/tnl-compile.in
+++ b/tools/tnl-compile.in
@@ -7,7 +7,7 @@ CXX_STD_FLAGS="-std=c++11"
 for option in "$@"
 do
     case $option in
-        --cuda                  ) CUDA_FLAGS="-DHAVE_CUDA `tnl-cuda-arch`" ;;
+        --cuda                  ) CUDA_FLAGS="-DHAVE_CUDA --std c++11 `tnl-cuda-arch`" ;;
         --debug                 ) DEBUG_FLAGS="-g -O0"
     esac
 done