diff --git a/src/TNL/Devices/Cuda.cpp b/src/TNL/Devices/Cuda.cpp
index ef868455784575e3304ac4ad2416e6b856f38ce5..11301ef8687534f30202526bdfd59a06d142f530 100644
--- a/src/TNL/Devices/Cuda.cpp
+++ b/src/TNL/Devices/Cuda.cpp
@@ -19,6 +19,7 @@ namespace TNL {
 namespace Devices {
 
 SmartPointersRegister Cuda::smartPointersRegister;
+Timer Cuda::smartPointersSynchronizationTimer;
 
 String Cuda::getDeviceType()
 {
@@ -67,6 +68,8 @@ bool Cuda::setup( const Config::ParameterContainer& parameters,
       std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl;
       return false;
    }
+   smartPointersSynchronizationTimer.reset();
+   smartPointersSynchronizationTimer.stop();
 #endif
    return true;
 }
@@ -85,7 +88,10 @@ bool Cuda::synchronizeDevice( int deviceId )
 {
    if( deviceId < 0 )
       deviceId = Devices::CudaDeviceInfo::getActiveDevice();
-   return smartPointersRegister.synchronizeDevice( deviceId );
+   smartPointersSynchronizationTimer.start();
+   bool b = smartPointersRegister.synchronizeDevice( deviceId );
+   smartPointersSynchronizationTimer.stop();
+   return b;
 }
 
 } // namespace Devices
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 301b0091634e412522a52539563b2a8c0e6b122c..dc3923f4af70ebbf4161b8542d08b2308b5fcf4f 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -15,6 +15,7 @@
 #include <TNL/String.h>
 #include <TNL/Assert.h>
 #include <TNL/SmartPointersRegister.h>
+#include <TNL/Timer.h>
 
 namespace TNL {
 
@@ -31,7 +32,6 @@ namespace Devices {
 #define __cuda_callable__
 #endif
 
-
 class Cuda
 {
    public:
@@ -108,10 +108,13 @@ class Cuda
    // called to get the device ID.
    static bool synchronizeDevice( int deviceId = -1 );
    
+   static Timer smartPointersSynchronizationTimer;
+   
    protected:
    
    static SmartPointersRegister smartPointersRegister;
-
+   
+   
 };
 
 #ifdef HAVE_CUDA
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
index c20e8fa39ffe917ab69511a63894b50f13aac744..84df79b9fb672432c71fad9b67817118c52546c7 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
@@ -156,6 +156,8 @@ operator()( const PreimageFunction& u,
    const typename EntityType::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >();
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
+   const IndexType c = entity.getIndex();
+   const IndexType xSize = entity.getMesh().getDimensions().x();
    return ( u[ neighbourEntities.template getEntityIndex< -1,  0 >() ]
           + u[ neighbourEntities.template getEntityIndex<  1,  0 >() ] ) * hxSquareInverse +
           ( u[ neighbourEntities.template getEntityIndex<  0, -1 >() ]
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index dcb8e69a3c481bf389b7332f5276eadded6b4e88..6dc77992669e167ca200a820a87bf32be79d4178 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -69,8 +69,6 @@ bool
 HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
 writeEpilog( Logger& logger )
 {
-   logger.writeParameter< const char* >( "GPU transfer time:", "" );
-   this->gpuTransferTimer.writeLog( logger, 1 );
    return true;
 }
 
@@ -239,9 +237,9 @@ getExplicitRHS( const RealType& time,
       time + tau,
       this->uPointer );*/
    
-   //uPointer->write( "u.txt", "gnuplot" );
-   //fuPointer->write( "fu.txt", "gnuplot" );
-   //getchar();
+   /*uPointer->write( "u.txt", "gnuplot" );
+   fuPointer->write( "fu.txt", "gnuplot" );
+   getchar();*/
 }
 
 template< typename Mesh,
diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h
index afef48a6bfdbd3f6cd5e439765d22d2a6738d91c..2d988c8b4120920ca346dd7e8e205b43f64660f9 100644
--- a/src/TNL/Solvers/SolverStarter_impl.h
+++ b/src/TNL/Solvers/SolverStarter_impl.h
@@ -478,6 +478,11 @@ bool SolverStarter< ConfigTag > :: writeEpilog( std::ostream& str, const Solver&
       return false;
    logger.writeParameter< const char* >( "Compute time:", "" );
    this->computeTimer.writeLog( logger, 1 );
+   if( std::is_same< typename Solver::DeviceType, TNL::Devices::Cuda >::value )
+   {
+      logger.writeParameter< const char* >( "GPU synchronization time:", "" );
+      TNL::Devices::Cuda::smartPointersSynchronizationTimer.writeLog( logger, 1 );
+   }   
    logger.writeParameter< const char* >( "I/O time:", "" );
    this->ioTimer.writeLog( logger, 1 );
    logger.writeParameter< const char* >( "Total time:", "" );
diff --git a/src/TNL/Timer.cpp b/src/TNL/Timer.cpp
index 06a50358017c631b9710d5e88b1fd093e43fc4a3..dc561202e7f6f7b759cc599edba7e37d2a1b7362 100644
--- a/src/TNL/Timer.cpp
+++ b/src/TNL/Timer.cpp
@@ -9,6 +9,7 @@
 /* See Copyright Notice in tnl/Copyright */
 
 #include <TNL/Timer.h>
+#include <TNL/Logger.h>
 
 #include <TNL/tnlConfig.h>
 #ifdef HAVE_SYS_RESOURCE_H
diff --git a/src/TNL/Timer.h b/src/TNL/Timer.h
index 58e35e03d1636178ac6907ade662b8217447639a..5019ab46f3bf2ff0dcef2bed65d96ac9af848457 100644
--- a/src/TNL/Timer.h
+++ b/src/TNL/Timer.h
@@ -11,10 +11,10 @@
 
 #pragma once
 
-#include <TNL/Logger.h>
-
 namespace TNL {
 
+class Logger;
+
 class Timer
 {
    public:
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
index e9e3b5af463addbc9b1411ce779f8616a64c0905..d074b13bc504c16e62a03df39ed7eedd8b997f83 100644
--- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
+++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
@@ -354,15 +354,15 @@ heatEquationTemplatedCompact( const GridType* grid,
    {
       GridEntity entity( *grid, coordinates, entityOrientation, entityBasis );
       
-      //entity.refresh();
-      /*if( ! entity.isBoundaryEntity() )
+      entity.refresh();
+      if( ! entity.isBoundaryEntity() )
       {
          fu( entity ) = 
             ( *differentialOperator )( u, entity, time );
 
          typedef Functions::FunctionAdapter< GridType, RightHandSide > FunctionAdapter;
          fu( entity ) +=  FunctionAdapter::getValue( *rightHandSide, entity, time );
-      }*/
+      }
    }
 }
 #endif
diff --git a/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h b/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h
index 7f2dcfe6bd007bdafbcb634747180b91df3a73d8..3097d652f0a9928d791854c1eacb2728790dba6e 100644
--- a/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h
+++ b/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h
@@ -52,12 +52,12 @@ __global__ void boundaryConditionsKernel( Real* u,
       aux[ j * gridXSize + gridYSize - 1 ] = 0.0;
       u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
    }
-   if( j == 0 && i > 0 && i < gridXSize - 1 )
+   if( j == 0 && i < gridXSize )
    {
       aux[ i ] = 0.0; //u[ j * gridXSize + 1 ];
       u[ i ] = 0.0; //u[ j * gridXSize + 1 ];
    }
-   if( j == gridYSize -1  && i > 0 && i < gridXSize - 1 )
+   if( j == gridYSize -1  && i < gridXSize )
    {
       aux[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
       u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];      
@@ -80,11 +80,11 @@ __global__ void heatEquationKernel( const Real* u,
        j > 0 && j < gridYSize - 1 )
    {
       const Index c = j * gridXSize + i;
-      aux[ c ] = tau * ( ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv +
-                         ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] * hy_inv ) );      
-      //aux[ c ] = tau * ( ( __ldg( &u[ c - 1 ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + 1 ] ) ) * hx_inv +
-      //                 ( __ldg( &u[ c - gridXSize ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + gridXSize ] ) ) * hy_inv );
-   }
+      aux[ c ] = ( ( u[ c - 1 ]         - 2.0 * u[ c ] + u[ c + 1 ]         ) * hx_inv +
+                   ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv );
+      //aux[ c ] = ( ( __ldg( &u[ c - 1 ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + 1 ] ) ) * hx_inv +
+      //                   ( __ldg( &u[ c - gridXSize ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + gridXSize ] ) ) * hy_inv );
+   }  
 }
 
 template< typename RealType >
diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h b/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h
index 6834ed731da5afeec97f485b4723d176fe741527..f040068f3a9da2c817002ed2572eaffa0560e34b 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h
+++ b/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h
@@ -190,13 +190,14 @@ template< typename Real, typename Index >
 __global__ void updateKernel( Real* u,
                               Real* aux,
                               Real* cudaBlockResidue,
-                              const Index dofs )
+                              const Index dofs,
+                              Real tau )
 {
    const Index blockOffset = blockIdx.x * blockDim.x;
    Index idx = blockOffset + threadIdx.x;
  
    if( idx < dofs )
-      u[ idx ] += aux[ idx ];
+      u[ idx ] += tau * aux[ idx ];
  
    __syncthreads();
 
@@ -346,29 +347,31 @@ bool solveHeatEquationCuda( const Config::ParameterContainer& parameters,
       const Real timeLeft = finalTime - time;
       const Real currentTau = tau < timeLeft ? tau : timeLeft;    
       
-      if( ! pureCRhsCuda( cudaGridSize, cudaBlockSize, cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize) )
+      if( ! pureCRhsCuda( cudaGridSize, cudaBlockSize, cuda_u, cuda_aux, currentTau, hx_inv, hy_inv, gridXSize, gridYSize) )
          return false;
       computationTimer.stop();
       
-      /*cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
-      writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
-      getchar();*/
-        
+      /*if( iteration % 100 == 0 )
+      {
+         cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
+         writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
+
+         cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
+         writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
+         getchar();
+      }*/      
+      
       updateTimer.start();
       /****
        * Update
        */
       //cout << "Update ... " << std::endl;
-      updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount );
+      updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount, tau );
       if( cudaGetLastError() != cudaSuccess )
       {
          std::cerr << "Update failed." << std::endl;
          return false;
-      }
-      /*cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ),  cudaMemcpyDeviceToHost );
-      writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
-      getchar();*/
-      
+      }            
       
       cudaThreadSynchronize();
       cudaMemcpy( max_du, cuda_max_du, cudaUpdateBlocks.x * sizeof( Real ), cudaMemcpyDeviceToHost );
@@ -391,12 +394,18 @@ bool solveHeatEquationCuda( const Config::ParameterContainer& parameters,
          cout << "Iteration: " << iteration << "\t Time:" << time << "    \r" << flush;                 
    }
    timer.stop();
+   if( verbose )
+     cout << endl;
+   
    //cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
    //writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
 
    /****
     * Saving the result
     */
+   if( verbose )
+     std::cout << "Saving result..." << std::endl;
+   
    meshFunction.save( "simple-heat-equation-result.tnl" );
    
    /***
@@ -535,6 +544,9 @@ bool solveHeatEquationHost( const Config::ParameterContainer& parameters,
         std::cout << "Iteration: " << iteration << "\t \t Time:" << time << "    \r" << std::flush;
    }
    timer.stop();
+   if( verbose )
+     cout << endl;
+
    
    /****
     * Saving the result