diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h
index 979c70e7b5d48ec7b8bedcbbcab562dc72b72857..fe7d4b9ba8171c2beb52486b1e177fb087abc850 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h
@@ -40,8 +40,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >
       
       template< typename MeshEntity >
       __cuda_callable__ void updateCell( MeshFunctionType& u,
-                                         const MeshEntity& cell );
+                                         const MeshEntity& cell,
+                                         const RealType velocity = 1.0  );
       
+      __cuda_callable__ bool updateCell( volatile Real sArray[18],
+                                         int thri, const Real h,
+                                         const Real velocity = 1.0 );
 };
 
 
@@ -72,12 +76,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
       __cuda_callable__ bool updateCell( volatile Real sArray[18][18],
                                          int thri, int thrj, const Real hx, const Real hy,
                                          const Real velocity = 1.0 );
-      
-      __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18],
-                                            int dimX, int dimY, int i, int j );
-      
-      /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18],
-                                            int dimX, int dimY, int i, int j );*/
 };
 
 template< typename Real,
@@ -113,6 +111,17 @@ __cuda_callable__ void sortMinims( T1 pom[] );
 
 
 #ifdef HAVE_CUDA
+template < typename Real, typename Device, typename Index >
+__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, 
+                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output,
+                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap  );
+
+template < typename Real, typename Device, typename Index >
+__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr,
+                                      const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap,
+                                      Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
+                                      bool *BlockIterDevice );
+
 template < typename Real, typename Device, typename Index >
 __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
                                       const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h
index 9ec95c25f686a8cda33ae034f7084110d27562cc..c742a11b501c818beccf88b94c1671b133c4b8cb 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h
@@ -21,45 +21,72 @@ initInterface( const MeshFunctionPointer& _input,
                MeshFunctionPointer& _output,
                InterfaceMapPointer& _interfaceMap  )
 {
-   const MeshType& mesh = _input->getMesh();
-   typedef typename MeshType::Cell Cell;
-   const MeshFunctionType& input = _input.getData();
-   MeshFunctionType& output = _output.modifyData();
-   InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
-   Cell cell( mesh );
-   for( cell.getCoordinates().x() = 1;
-        cell.getCoordinates().x() < mesh.getDimensions().x() - 1;
-        cell.getCoordinates().x() ++ )
-   {
-      cell.refresh();
-      const RealType& c = input( cell );      
-      if( ! cell.isBoundaryEntity()  )
-      {
-         const auto& neighbors = cell.getNeighborEntities();
-         const RealType& h = mesh.getSpaceSteps().x();
-         //const IndexType& c = cell.getIndex();
-         const IndexType e = neighbors.template getEntityIndex<  1 >();
-         const IndexType w = neighbors.template getEntityIndex< -1 >();
-         if( c * input[ e ] <=0 )
-         {
-             output[ cell.getIndex() ] =
-             c >= 0 ? ( h * c )/( c - input[ e ] ) :
-                      - ( h * c )/( c - input[ e ] );
-             interfaceMap[ cell.getIndex() ] = true;
-         }
-         if( c * input[ w ] <=0 )
-         {
-             output[ cell.getIndex() ] =
-             c >= 0 ? ( h * c )/( c - input[ w ] ) :
-                      - ( h * c )/( c - input[ w ] );
-             interfaceMap[ cell.getIndex() ] = true;
-         }
-      }
-      output[ cell.getIndex() ] =
-      c > 0 ? TNL::TypeInfo< RealType >::getMaxValue() :
-             -TypeInfo< RealType >::getMaxValue();
-      interfaceMap[ cell.getIndex() ] = false;
-   }
+    if( std::is_same< Device, Devices::Cuda >::value )
+    {
+#ifdef HAVE_CUDA
+        const MeshType& mesh = _input->getMesh();
+        
+        const int cudaBlockSize( 16 );
+        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
+        dim3 blockSize( cudaBlockSize );
+        dim3 gridSize( numBlocksX );
+        Devices::Cuda::synchronizeDevice();
+        CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
+                                                   _output.template modifyData< Device >(),
+                                                   _interfaceMap.template modifyData< Device >() );
+        cudaDeviceSynchronize();
+        TNL_CHECK_CUDA_DEVICE;
+#endif
+    }
+    if( std::is_same< Device, Devices::Host >::value )
+    {
+        const MeshType& mesh = _input->getMesh();
+        typedef typename MeshType::Cell Cell;
+        const MeshFunctionType& input = _input.getData();
+        MeshFunctionType& output = _output.modifyData();
+        InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
+        Cell cell( mesh );
+        for( cell.getCoordinates().x() = 0;
+            cell.getCoordinates().x() < mesh.getDimensions().x();
+            cell.getCoordinates().x() ++ )
+           {
+               cell.refresh();
+               output[ cell.getIndex() ] =
+               input( cell ) >= 0 ? TypeInfo< RealType >::getMaxValue() :
+                                  - TypeInfo< RealType >::getMaxValue();
+               interfaceMap[ cell.getIndex() ] = false;
+           }
+        
+        
+        const RealType& h = mesh.getSpaceSteps().x();
+        for( cell.getCoordinates().x() = 0;
+             cell.getCoordinates().x() < mesh.getDimensions().x() - 1;
+             cell.getCoordinates().x() ++ )
+        {
+           cell.refresh();
+           const RealType& c = input( cell );      
+           if( ! cell.isBoundaryEntity()  )
+           {
+              const auto& neighbors = cell.getNeighborEntities();
+              Real pom = 0;
+              //const IndexType& c = cell.getIndex();
+              const IndexType e = neighbors.template getEntityIndex<  1 >();
+              if( c * input[ e ] <= 0 )
+              {
+                pom = TNL::sign( c )*( h * c )/( c - input[ e ]);
+                if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) )
+                    output[ cell.getIndex() ] = pom;
+
+                pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx;
+                if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
+                    output[ e ] = pom; 
+
+                interfaceMap[ cell.getIndex() ] = true;
+                interfaceMap[ e ] = true;
+              }
+           }
+        }
+    }
 }
 
 template< typename Real,
@@ -69,8 +96,31 @@ template< typename Real,
 void
 tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
 updateCell( MeshFunctionType& u,
-            const MeshEntity& cell )
+            const MeshEntity& cell, 
+            const RealType v )
 {
+    const auto& neighborEntities = cell.template getNeighborEntities< 1 >();
+    const MeshType& mesh = cell.getMesh();
+    const RealType& h = mesh.getSpaceSteps().x();
+    const RealType value = u( cell );
+    RealType a, tmp = TypeInfo< RealType >::getMaxValue();
+
+    if( cell.getCoordinates().x() == 0 )
+       a = u[ neighborEntities.template getEntityIndex< 1 >() ];
+    else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 )
+       a = u[ neighborEntities.template getEntityIndex< -1 >() ];
+    else
+    {
+       a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ],
+                           u[ neighborEntities.template getEntityIndex<  1 >() ] );
+    }
+
+    if( fabs( a ) == TypeInfo< RealType >::getMaxValue() )
+      return;
+   
+    tmp = a + TNL::sign( value ) * h/v;
+    
+    u[ cell.getIndex() ] = argAbsMin( value, tmp );
 }
 
 template< typename Real,
@@ -103,7 +153,20 @@ initInterface( const MeshFunctionPointer& _input,
     }
     if( std::is_same< Device, Devices::Host >::value )
     {
-         const MeshFunctionType& input = _input.getData();
+        MeshFunctionType input = _input.getData();
+        
+        double A[320][320];
+        std::ifstream fileInit("/home/maty/Downloads/initData.txt");
+
+        for (int i = 0; i < 320; i++)
+            for (int j = 0; j < 320; j++)
+                fileInit >> A[i][j];
+        fileInit.close();
+        for (int i = 0; i < 320; i++)
+            for (int j = 0; j < 320; j++)
+                input[i*320 + j] = A[i][j];
+        
+        
          MeshFunctionType& output = _output.modifyData();
          InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
         const MeshType& mesh = input.getMesh();
@@ -186,7 +249,7 @@ initInterface( const MeshFunctionPointer& _input,
 
                         pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx;
                         if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
-                            output[ e ] = pom;                         
+                            output[ e ] = pom; 
                     /*}else
                     {
                         pom = - (hx * c)/( c - input[ e ]);
@@ -632,64 +695,60 @@ __cuda_callable__ void sortMinims( T1 pom[] )
     }   
 }
 
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__ void
-tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
-setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy )
-{
-    int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0);
-    int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0);
-    int xkolik = 18;
-    int ykolik = 18;
-    int xOd = 0;
-    int yOd = 0;
-    
-    if( blockIdx == 0 )
-        xOd = 1;
-    if( blockIdy == 0 )
-        yOd = 1;
-    
-    if( numOfBlockx - 1 == blockIdx )
-        xkolik = dimX - (numOfBlockx-1)*16+1;
 
-    if( numOfBlocky -1 == blockIdy )
-        ykolik = dimY - (numOfBlocky-1)*16+1;
+
+#ifdef HAVE_CUDA
+template < typename Real, typename Device, typename Index >
+__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, 
+                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output,
+                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap  )
+{
+    int i = threadIdx.x + blockDim.x*blockIdx.x;
+    const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >();
     
-    if( dimX > (blockIdx+1) * 16 )
-    {
-        for( int i = yOd; i < ykolik; i++ )
-        {
-            sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ];
-        }
-    }
-    if( blockIdx != 0 )
-    {
-        for( int i = yOd; i < ykolik; i++ )
-        {
-            sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0];
-        }
-    } 
-    if( dimY > (blockIdy+1) * 16 )
+    if( i < mesh.getDimensions().x()  )
     {
-        for( int i = xOd; i < xkolik; i++ )
+        typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell;
+        Cell cell( mesh );
+        cell.getCoordinates().x() = i;
+        cell.refresh();
+        const Index cind = cell.getIndex();
+
+
+        output[ cind ] =
+               input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() :
+                                    - TypeInfo< Real >::getMaxValue();
+        interfaceMap[ cind ] = false; 
+
+        const Real& h = mesh.getSpaceSteps().x();
+        cell.refresh();
+        const Real& c = input( cell );
+        if( ! cell.isBoundaryEntity()  )
         {
-            sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ];
+           auto neighbors = cell.getNeighborEntities();
+           Real pom = 0;
+           const Index e = neighbors.template getEntityIndex< 1 >();
+           const Index w = neighbors.template getEntityIndex< -1 >();
+           if( c * input[ e ] <= 0 )
+           {
+               pom = TNL::sign( c )*( h * c )/( c - input[ e ]);
+               if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) )
+                   output[ cind ] = pom;                       
+
+               interfaceMap[ cind ] = true;
+           }
+           if( c * input[ w ] <= 0 )
+           {
+               pom = TNL::sign( c )*( h * c )/( c - input[ w ]);
+               if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) 
+                   output[ cind ] = pom;
+
+               interfaceMap[ cind ] = true;
+           }
         }
     }
-    if( blockIdy != 0 )
-    {
-        for( int i = xOd; i < xkolik; i++ )
-        {
-            sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ];
-        }
-    }   
+           
 }
-
-
-
-#ifdef HAVE_CUDA
 template < typename Real, typename Device, typename Index >
 __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
                                 Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
@@ -886,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
     {
         sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
         tmp = value - sArray[ thrj ][ thri ];
-        if ( fabs( tmp ) >  0.001 )
+        if ( fabs( tmp ) >  0.01*hx )
             return true;
         else
             return false;
@@ -898,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
             ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
         sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
         tmp = value - sArray[ thrj ][ thri ];
-        if ( fabs( tmp ) > 0.01 )
+        if ( fabs( tmp ) > 0.01*hx )
             return true;
         else
             return false;
@@ -906,4 +965,33 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
     
     return false;
 }
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool
+tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
+updateCell( volatile Real sArray[18], int thri, const Real h, const Real v )
+{
+   const RealType value = sArray[ thri ];
+   RealType a, tmp = TypeInfo< RealType >::getMaxValue();
+      
+   a = TNL::argAbsMin( sArray[ thri+1 ],
+                       sArray[ thri-1 ] );
+
+    if( fabs( a ) == TypeInfo< RealType >::getMaxValue() )
+       return false;
+   
+    tmp = a + TNL::sign( value ) * h/v;
+    
+                                
+    sArray[ thri ] = argAbsMin( value, tmp );
+    
+    tmp = value - sArray[ thri ];
+    if ( fabs( tmp ) >  0.01*h )
+        return true;
+    else
+        return false;
+}
 #endif
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h
index 57ac7d6896f4db6dc91389af45d9d7c6cef43ec2..d1eb8fe2e82caa6b3111da3f8d6ce9be3ca7bf3c 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h
@@ -125,7 +125,7 @@ bool
 tnlDirectEikonalProblem< Mesh, Anisotropy, Real, Index >::
 solve( const MeshPointer& mesh,
        DofVectorPointer& dofs )
-{
+{   
    FastSweepingMethod< MeshType, AnisotropyType > fsm;
    fsm.solve( mesh, anisotropy, initialData );
    return true;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
index 105f1ceda9b7af99e896e9aadae910330a543004..87974c6847bbc60e60177b33c5884eca3f2ef0ec 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h
@@ -57,14 +57,191 @@ FastSweepingMethod< Meshes::Grid< 1, Real, Device, Index >, Anisotropy >::
 solve( const MeshPointer& mesh,
        const AnisotropyPointer& anisotropy,
        MeshFunctionPointer& u )
-{
-   MeshFunctionPointer aux;
-   InterfaceMapPointer interfaceMap;
-   aux->setMesh( mesh );
-   interfaceMap->setMesh( mesh );
+{   
+   MeshFunctionPointer auxPtr;
+   InterfaceMapPointer interfaceMapPtr;
+   auxPtr->setMesh( mesh );
+   interfaceMapPtr->setMesh( mesh );
    std::cout << "Initiating the interface cells ..." << std::endl;
-   BaseType::initInterface( u, aux, interfaceMap );
-   aux->save( "aux-ini.tnl" );
+   BaseType::initInterface( u, auxPtr, interfaceMapPtr );
+        
+   auxPtr->save( "aux-ini.tnl" );
+
+   typename MeshType::Cell cell( *mesh );
+   
+   IndexType iteration( 0 );
+   InterfaceMapType interfaceMap = *interfaceMapPtr;
+   MeshFunctionType aux = *auxPtr;
+   while( iteration < this->maxIterations )
+   {
+      if( std::is_same< DeviceType, Devices::Host >::value )
+      {
+          for( cell.getCoordinates().x() = 0;
+               cell.getCoordinates().x() < mesh->getDimensions().x();
+               cell.getCoordinates().x()++ )
+             {
+                cell.refresh();
+                if( ! interfaceMap( cell ) )
+                   this->updateCell( aux, cell );
+             }
+
+         
+        for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
+             cell.getCoordinates().x() >= 0 ;
+             cell.getCoordinates().x()-- )		
+           {
+              cell.refresh();
+              if( ! interfaceMap( cell ) )            
+                 this->updateCell( aux, cell );
+           }
+      }
+      if( std::is_same< DeviceType, Devices::Cuda >::value )
+      {
+         // TODO: CUDA code
+#ifdef HAVE_CUDA
+          const int cudaBlockSize( 16 );
+          int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
+          dim3 blockSize( cudaBlockSize );
+          dim3 gridSize( numBlocksX );
+          
+          tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr;
+          
+          
+          
+          bool* BlockIter = (bool*)malloc( ( numBlocksX ) * sizeof( bool ) );
+          
+          bool *BlockIterDevice;
+          cudaMalloc(&BlockIterDevice, ( numBlocksX ) * sizeof( bool ) );
+          
+          while( BlockIter[ 0 ] )
+          {
+           for( int i = 0; i < numBlocksX; i++ )
+                BlockIter[ i ] = false;
+           cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX ) * sizeof( bool ), cudaMemcpyHostToDevice);
+                       
+            CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
+                                                             interfaceMapPtr.template getData< Device >(),
+                                                             auxPtr.template modifyData< Device>(),
+                                                             BlockIterDevice );
+            cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX ) * sizeof( bool ), cudaMemcpyDeviceToHost);
+                                   
+            for( int i = 1; i < numBlocksX; i++ )
+                BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];
+            
+          }
+          delete[] BlockIter;
+          cudaFree( BlockIterDevice );
+          cudaDeviceSynchronize();
+          
+          TNL_CHECK_CUDA_DEVICE;
+              
+          aux = *auxPtr;
+          interfaceMap = *interfaceMapPtr;
+#endif
+      }
+      iteration++;
+   }
+   
+   aux.save("aux-final.tnl");
+}
+
+#ifdef HAVE_CUDA
+template < typename Real, typename Device, typename Index >
+__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr,
+                                      const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap,
+                                      Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
+                                      bool *BlockIterDevice )
+{
+    int thri = threadIdx.x;
+    int blIdx = blockIdx.x;
+    int i = thri + blockDim.x*blIdx;
+    
+    __shared__ volatile bool changed[16];
+    changed[ thri ] = false;
+    
+    if( thri == 0 )
+        changed[ 0 ] = true;
+    
+    const Meshes::Grid< 1, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
+    __shared__ Real h;
+    if( thri == 1 )
+    {
+        h = mesh.getSpaceSteps().x();
+    }
+    
+    __shared__ volatile Real sArray[ 18 ];
+    sArray[thri] = TypeInfo< Real >::getMaxValue();
+    
+    //filling sArray edges
+    int dimX = mesh.getDimensions().x(); 
+    __shared__ volatile int numOfBlockx;
+    __shared__ int xkolik;
+    if( thri == 0 )
+    {
+        xkolik = blockDim.x + 1;
+        numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0);
+    
+        if( numOfBlockx - 1 == blIdx )
+            xkolik = dimX - (blIdx)*blockDim.x+1;
+    }
+    __syncthreads();
+    
+    if( thri == 0 )
+    {        
+        if( dimX > (blIdx+1) * blockDim.x )
+            sArray[xkolik] = aux[ blIdx*blockDim.x - 1 + xkolik ];
+        else
+            sArray[xkolik] = TypeInfo< Real >::getMaxValue();
+    }
+    
+    if( thri == 1 )
+    {
+        if( blIdx != 0 )
+            sArray[0] = aux[ blIdx*blockDim.x - 1 ];
+        else
+            sArray[0] = TypeInfo< Real >::getMaxValue();
+    }
+    
+        
+    if( i < mesh.getDimensions().x() )
+    {    
+        sArray[thri+1] = aux[ i ];
+    }
+    __syncthreads();  
 
+    while( changed[ 0 ] )
+    {
+        __syncthreads();
+        
+        changed[ thri ] = false;
+        
+    //calculation of update cell
+        if( i < mesh.getDimensions().x() )
+        {
+            if( ! interfaceMap[ i ] )
+            {
+                changed[ thri ] = ptr.updateCell( sArray, thri+1, h );
+            }
+        }
+        __syncthreads();
+        
+        
+        
+    //pyramid reduction
+        if( thri < 8 ) changed[ thri ] = changed[ thri ] || changed[ thri + 8 ];
+        if( thri < 4 ) changed[ thri ] = changed[ thri ] || changed[ thri + 4 ];
+        if( thri < 2 ) changed[ thri ] = changed[ thri ] || changed[ thri + 2 ];
+        if( thri < 1 ) changed[ thri ] = changed[ thri ] || changed[ thri + 1 ];
+        __syncthreads();
+
+        if( changed[ 0 ] && thri == 0 )
+            BlockIterDevice[ blIdx ] = true;
+        __syncthreads();
+    }
+  
+    if( i < mesh.getDimensions().x()  && (!interfaceMap[ i ]) )
+        aux[ i ] = sArray[ thri + 1 ];
 }
+#endif
+
 
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
index e727104086713a7c0e54c55b5a343f06b17c8f42..e5a9c39f30ace9f19f3a5d527550fb8d1db9f6f8 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h
@@ -18,13 +18,16 @@
 #include <TNL/Devices/Cuda.h>
 
 
+#include <iostream>
+#include <fstream>
+
 template< typename Real,
           typename Device,
           typename Index,
           typename Anisotropy >
 FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >::
 FastSweepingMethod()
-: maxIterations( 1 )
+: maxIterations( 2 )
 {
    
 }
@@ -61,15 +64,31 @@ solve( const MeshPointer& mesh,
        const AnisotropyPointer& anisotropy,
        MeshFunctionPointer& u )
 {
+   MeshFunctionType v;
+   v.setMesh(mesh);
+   double A[320][320];
+    for (int i = 0; i < 320; i++)
+        for (int j = 0; j < 320; j++)
+            A[i][j] = 0;
+    
+    std::ifstream file("/home/maty/Downloads/mapa2.txt");
+
+    for (int i = 0; i < 320; i++)
+        for (int j = 0; j < 320; j++)
+            file >> A[i][j];
+    file.close();
+    for (int i = 0; i < 320; i++)
+        for (int j = 0; j < 320; j++)
+            v[i*320 + j] = A[i][j];
+   v.save("mapa.tnl");
+   
+       
    MeshFunctionPointer auxPtr;
    InterfaceMapPointer interfaceMapPtr;
    auxPtr->setMesh( mesh );
    interfaceMapPtr->setMesh( mesh );
    std::cout << "Initiating the interface cells ..." << std::endl;
    BaseType::initInterface( u, auxPtr, interfaceMapPtr );
-#ifdef HAVE_CUDA
-   cudaDeviceSynchronize();
-#endif
         
    auxPtr->save( "aux-ini.tnl" );
 
@@ -92,7 +111,7 @@ solve( const MeshPointer& mesh,
                {
                   cell.refresh();
                   if( ! interfaceMap( cell ) )
-                     this->updateCell( aux, cell );
+                     this->updateCell( aux, cell, v( cell ) );
                }
          }
 
@@ -109,7 +128,7 @@ solve( const MeshPointer& mesh,
                   //std::cerr << "2 -> ";
                   cell.refresh();
                   if( ! interfaceMap( cell ) )            
-                     this->updateCell( aux, cell );
+                     this->updateCell( aux, cell, v( cell ) );
                }
          }
 
@@ -126,7 +145,7 @@ solve( const MeshPointer& mesh,
                   //std::cerr << "3 -> ";
                   cell.refresh();
                   if( ! interfaceMap( cell ) )            
-                     this->updateCell( aux, cell );
+                     this->updateCell( aux, cell, v( cell ) );
                }
             }
 
@@ -143,7 +162,7 @@ solve( const MeshPointer& mesh,
                   //std::cerr << "4 -> ";
                   cell.refresh();
                   if( ! interfaceMap( cell ) )            
-                     this->updateCell( aux, cell );
+                     this->updateCell( aux, cell, v( cell ) );
                }
             }
 
@@ -222,7 +241,6 @@ solve( const MeshPointer& mesh,
           int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
           dim3 blockSize( cudaBlockSize, cudaBlockSize );
           dim3 gridSize( numBlocksX, numBlocksY );
-          Devices::Cuda::synchronizeDevice();
           
           tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;
           
@@ -243,20 +261,8 @@ solve( const MeshPointer& mesh,
                                                              interfaceMapPtr.template getData< Device >(),
                                                              auxPtr.template modifyData< Device>(),
                                                              BlockIterDevice );
-            cudaDeviceSynchronize();         
-            TNL_CHECK_CUDA_DEVICE;
             cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost);
-            
-            /*for( int j = numBlocksY-1; j > -1; j-- )
-            {    
-                for( int i = 0; i < numBlocksX; i++ )
-                {
-                    std::cout << BlockIter[ j * numBlocksY + i ] << "\t";  
-                }
-                std::cout << std::endl;
-            }
-            std::cout << std::endl;*/
-                         
+                                   
             for( int i = 1; i < numBlocksX * numBlocksY; i++ )
                 BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];
             
@@ -294,12 +300,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
     
     if( thrj == 0 && thri == 0 )
         changed[ 0 ] = true;
-    __syncthreads();
     
     const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
     __shared__ Real hx;
     __shared__ Real hy;
-    if( thrj == 0 && thri == 0 )
+    if( thrj == 1 && thri == 1 )
     {
         hx = mesh.getSpaceSteps().x();
         hy = mesh.getSpaceSteps().y();
@@ -345,20 +350,20 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
             sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue();
     }
     
-    if( thrj == 0 )
+    if( thri == 2 )
     {
         if( dimY > (blIdy+1) * blockDim.y  && thri+1 < xkolik )
-            sArray[ykolik][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thri+1 ];
+            sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ];
         else
-           sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue();
+           sArray[ykolik][thrj+1] = TypeInfo< Real >::getMaxValue();
     }
     
-    if( thrj == 1 )
+    if( thri == 3 )
     {
         if( blIdy != 0 && thri+1 < xkolik )
-            sArray[0][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thri+1 ];
+            sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ];
         else
-            sArray[0][thri+1] = TypeInfo< Real >::getMaxValue();
+            sArray[0][thrj+1] = TypeInfo< Real >::getMaxValue();
     }
     
         
@@ -422,7 +427,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
             if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ];
         }
         if( changed[ 0 ] && thri == 0 && thrj == 0 )
-            BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ];
+            BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = true;
         __syncthreads();
     }