diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
index 583e22478e71a66f9909f589f3ee049b2399ff79..50ea7bde8ee87b24317363feb78e30d45128fa3b 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h
@@ -365,13 +365,13 @@ __global__ void GetNeighbours( const TNL::Containers::Array< int, Devices::Cuda,
     m = i%numBlockX;
     k = i/numBlockX;
     if( m > 0 && blockCalculationIndicator[ i - 1 ] ){
-      pom = 1;//BlockIterPom[ i ] = 1;
+      pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
     }else if( m < numBlockX -1 && blockCalculationIndicator[ i + 1 ] ){
-      pom = 1;//BlockIterPom[ i ] = 1;
-    }else if( k > 0 && blockCalculationIndicatorHelp[ i - numBlockX ] ){
-      pom = 1;// BlockIterPom[ i ] = 1;
+      pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
+    }else if( k > 0 && blockCalculationIndicator[ i - numBlockX ] ){
+      pom = 1;// blockCalculationIndicatorHelp[ i ] = 1;
     }else if( k < numBlockY -1 && blockCalculationIndicator[ i + numBlockX ] ){
-      pom = 1;//BlockIterPom[ i ] = 1;
+      pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
     }
     
     if( blockCalculationIndicator[ i ] != 1 )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
index 91f9a0efef1fd3fedb105db239fb48eaf01e95ff..5b2a4b685b75a4bb616631b139bca7f1215b4c0b 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h
@@ -109,7 +109,7 @@ initInterface( const MeshFunctionPointer& _input,
                 output[ cell.getIndex() ] = pom;
               pom = pom - TNL::sign( c )*hx;
               if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
-                output[ e ] = pom; //( hy * c )/( c - input[ n ]) - hy;
+                output[ e ] = pom; 
               
               interfaceMap[ cell.getIndex() ] = true;
               interfaceMap[ e ] = true;
@@ -122,7 +122,7 @@ initInterface( const MeshFunctionPointer& _input,
                 output[ cell.getIndex() ] = pom;
               pom = pom - TNL::sign( c )*hz;
               if( TNL::abs( output[ t ] ) > TNL::abs( pom ) )
-                output[ t ] = pom; //( hy * c )/( c - input[ n ]) - hy;
+                output[ t ] = pom; 
               
               interfaceMap[ cell.getIndex() ] = true;
               interfaceMap[ t ] = true;
@@ -736,7 +736,7 @@ updateBlocks( const InterfaceMapType interfaceMap,
         MeshFunctionType& helpFunc,
         ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
 {  
-  //#pragma omp parallel for schedule( dynamic )
+  #pragma omp parallel for schedule( dynamic )
   for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
   {
     if( BlockIterHost[ i ] )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
index 66f9e6cdf2efcefb1b5e30b559ae301aa2af54ac..31d3f8b324a7fc2a02d94e54b5909190cacd965d 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
@@ -363,7 +363,7 @@ solve( const MeshPointer& mesh,
            interfaceMapPtr.template getData< Device >(),
            auxPtr.template getData< Device>(),
            helpFunc.template modifyData< Device>(),
-           blockCalculationIndicator,
+           blockCalculationIndicator, vecLowerOverlaps, vecUpperOverlaps,
            oddEvenBlock );
            cudaDeviceSynchronize();
            TNL_CHECK_CUDA_DEVICE;
@@ -381,15 +381,8 @@ solve( const MeshPointer& mesh,
            
            oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
            
-           CudaParallelReduc<<< nBlocks , 1024 >>>( blockCalculationIndicator, dBlock, ( numBlocksX * numBlocksY ) );
-           cudaDeviceSynchronize();
-           TNL_CHECK_CUDA_DEVICE;
-           CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
-           cudaDeviceSynchronize();
-           TNL_CHECK_CUDA_DEVICE;
-           
-           BlockIterD = dBlock.getElement( 0 );*/
-          
+           calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1);
+          */
   /**------------------------------------------------------------------------------------------------*/
           
           
@@ -441,7 +434,7 @@ solve( const MeshPointer& mesh,
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
           
-          // "Parallel reduction" to see if we should calculate again BlockIterD
+          // "Parallel reduction" to see if we should calculate again calculateCudaBlocksAgain
           calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1);
           
           // When we change something then we should caclucate again in the next passage of MPI ( calculated = true )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
index 2d73b174edd2ea69e1253f9eff67b9be18bd9b67..4895c7693772a9f33458a119506668a493996ec0 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h
@@ -103,8 +103,30 @@ solve( const MeshPointer& mesh,
         calculateMPIAgain = 0;
         
 /** HERE IS FSM FOR OPENMP (NO MPI) - isnt worthy */
-        /*int numThreadsPerBlock = 64;
+        /*int numThreadsPerBlock = -1;
          
+         numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0));
+         //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
+         if( numThreadsPerBlock <= 16 )
+         numThreadsPerBlock = 16;
+         else if(numThreadsPerBlock <= 32 )
+         numThreadsPerBlock = 32;
+         else if(numThreadsPerBlock <= 64 )
+         numThreadsPerBlock = 64;
+         else if(numThreadsPerBlock <= 128 )
+         numThreadsPerBlock = 128;
+         else if(numThreadsPerBlock <= 256 )
+         numThreadsPerBlock = 256;
+         else if(numThreadsPerBlock <= 512 )
+         numThreadsPerBlock = 512;
+         else
+         numThreadsPerBlock = 1024;
+         //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
+         
+         if( numThreadsPerBlock == -1 ){
+            printf("Fail in setting numThreadsPerBlock.\n");
+         break;
+         }
          
          int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0);
          int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0);
@@ -140,8 +162,22 @@ solve( const MeshPointer& mesh,
          helpFunc1 = auxPtr;
          auxPtr = helpFunc;
          helpFunc = helpFunc1;
+         switch ( numThreadsPerBlock ){
+         case 16:
+         this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
+         case 32:
+         this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
+         case 64:
          this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
-         
+         case 128:
+         this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
+         case 256:
+         this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
+         case 512:
+         this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
+         default:
+         this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
+         }
          //Reduction      
          for( int i = 0; i < BlockIterHost.getSize(); i++ ){
          if( IsCalculationDone == 0 ){
@@ -176,43 +212,43 @@ solve( const MeshPointer& mesh,
     // TOP, NORTH and WEST        
         boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
         boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
-        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
+        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
         
     // TOP, SOUTH and EAST        
         boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
-        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
+        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
         boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
         
     // TOP, SOUTH and WEST        
         boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
-        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
-        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; 
+        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
+        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; 
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
             
     // BOTTOM, NOTH and EAST        
-        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
+        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
         boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
         boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); 
         
     // BOTTOM, NOTH and WEST        
-        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
+        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
         boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
-        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; 
+        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; 
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
         
     // BOTTOM, SOUTH and EAST        
-        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
-        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
+        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
+        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
         boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );    
         
     // BOTTOM, SOUTH and WEST        
-        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
-        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
-        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
+        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
+        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
+        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
         goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );