Commit c822988e authored by Matouš Fencl's avatar Matouš Fencl
Browse files

Implementation of 1D solver is done for both CPU and GPU. 2D solver is now working fine

parent b387101c
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -40,8 +40,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >
      
      template< typename MeshEntity >
      __cuda_callable__ void updateCell( MeshFunctionType& u,
                                         const MeshEntity& cell );
                                         const MeshEntity& cell,
                                         const RealType velocity = 1.0  );
      
      __cuda_callable__ bool updateCell( volatile Real sArray[18],
                                         int thri, const Real h,
                                         const Real velocity = 1.0 );
};


@@ -72,12 +76,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
      __cuda_callable__ bool updateCell( volatile Real sArray[18][18],
                                         int thri, int thrj, const Real hx, const Real hy,
                                         const Real velocity = 1.0 );
      
      __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18],
                                            int dimX, int dimY, int i, int j );
      
      /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18],
                                            int dimX, int dimY, int i, int j );*/
};

template< typename Real,
@@ -113,6 +111,17 @@ __cuda_callable__ void sortMinims( T1 pom[] );


#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output,
                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap  );

template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr,
                                      const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap,
                                      Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
                                      bool *BlockIterDevice );

template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
                                      const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
+183 −95
Original line number Diff line number Diff line
@@ -20,6 +20,25 @@ tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
initInterface( const MeshFunctionPointer& _input,
               MeshFunctionPointer& _output,
               InterfaceMapPointer& _interfaceMap  )
{
    if( std::is_same< Device, Devices::Cuda >::value )
    {
#ifdef HAVE_CUDA
        const MeshType& mesh = _input->getMesh();
        
        const int cudaBlockSize( 16 );
        int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
        dim3 blockSize( cudaBlockSize );
        dim3 gridSize( numBlocksX );
        Devices::Cuda::synchronizeDevice();
        CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
                                                   _output.template modifyData< Device >(),
                                                   _interfaceMap.template modifyData< Device >() );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
#endif
    }
    if( std::is_same< Device, Devices::Host >::value )
    {
        const MeshType& mesh = _input->getMesh();
        typedef typename MeshType::Cell Cell;
@@ -27,7 +46,20 @@ initInterface( const MeshFunctionPointer& _input,
        MeshFunctionType& output = _output.modifyData();
        InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
        Cell cell( mesh );
   for( cell.getCoordinates().x() = 1;
        for( cell.getCoordinates().x() = 0;
            cell.getCoordinates().x() < mesh.getDimensions().x();
            cell.getCoordinates().x() ++ )
           {
               cell.refresh();
               output[ cell.getIndex() ] =
               input( cell ) >= 0 ? TypeInfo< RealType >::getMaxValue() :
                                  - TypeInfo< RealType >::getMaxValue();
               interfaceMap[ cell.getIndex() ] = false;
           }
        
        
        const RealType& h = mesh.getSpaceSteps().x();
        for( cell.getCoordinates().x() = 0;
             cell.getCoordinates().x() < mesh.getDimensions().x() - 1;
             cell.getCoordinates().x() ++ )
        {
@@ -36,29 +68,24 @@ initInterface( const MeshFunctionPointer& _input,
           if( ! cell.isBoundaryEntity()  )
           {
              const auto& neighbors = cell.getNeighborEntities();
         const RealType& h = mesh.getSpaceSteps().x();
              Real pom = 0;
              //const IndexType& c = cell.getIndex();
              const IndexType e = neighbors.template getEntityIndex<  1 >();
         const IndexType w = neighbors.template getEntityIndex< -1 >();
              if( c * input[ e ] <= 0 )
              {
             output[ cell.getIndex() ] =
             c >= 0 ? ( h * c )/( c - input[ e ] ) :
                      - ( h * c )/( c - input[ e ] );
                pom = TNL::sign( c )*( h * c )/( c - input[ e ]);
                if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) )
                    output[ cell.getIndex() ] = pom;

                pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx;
                if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
                    output[ e ] = pom; 

                interfaceMap[ cell.getIndex() ] = true;
                interfaceMap[ e ] = true;
              }
         if( c * input[ w ] <=0 )
         {
             output[ cell.getIndex() ] =
             c >= 0 ? ( h * c )/( c - input[ w ] ) :
                      - ( h * c )/( c - input[ w ] );
             interfaceMap[ cell.getIndex() ] = true;
           }
        }
      output[ cell.getIndex() ] =
      c > 0 ? TNL::TypeInfo< RealType >::getMaxValue() :
             -TypeInfo< RealType >::getMaxValue();
      interfaceMap[ cell.getIndex() ] = false;
    }
}

@@ -69,8 +96,31 @@ template< typename Real,
void
tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
updateCell( MeshFunctionType& u,
            const MeshEntity& cell )
            const MeshEntity& cell, 
            const RealType v )
{
    const auto& neighborEntities = cell.template getNeighborEntities< 1 >();
    const MeshType& mesh = cell.getMesh();
    const RealType& h = mesh.getSpaceSteps().x();
    const RealType value = u( cell );
    RealType a, tmp = TypeInfo< RealType >::getMaxValue();

    if( cell.getCoordinates().x() == 0 )
       a = u[ neighborEntities.template getEntityIndex< 1 >() ];
    else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 )
       a = u[ neighborEntities.template getEntityIndex< -1 >() ];
    else
    {
       a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ],
                           u[ neighborEntities.template getEntityIndex<  1 >() ] );
    }

    if( fabs( a ) == TypeInfo< RealType >::getMaxValue() )
      return;
   
    tmp = a + TNL::sign( value ) * h/v;
    
    u[ cell.getIndex() ] = argAbsMin( value, tmp );
}

template< typename Real,
@@ -103,7 +153,20 @@ initInterface( const MeshFunctionPointer& _input,
    }
    if( std::is_same< Device, Devices::Host >::value )
    {
         const MeshFunctionType& input = _input.getData();
        MeshFunctionType input = _input.getData();
        
        double A[320][320];
        std::ifstream fileInit("/home/maty/Downloads/initData.txt");

        for (int i = 0; i < 320; i++)
            for (int j = 0; j < 320; j++)
                fileInit >> A[i][j];
        fileInit.close();
        for (int i = 0; i < 320; i++)
            for (int j = 0; j < 320; j++)
                input[i*320 + j] = A[i][j];
        
        
         MeshFunctionType& output = _output.modifyData();
         InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
        const MeshType& mesh = input.getMesh();
@@ -632,64 +695,60 @@ __cuda_callable__ void sortMinims( T1 pom[] )
    }   
}

template< typename Real,
          typename Device,
          typename Index >
__cuda_callable__ void
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy )


#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output,
                                Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap  )
{
    int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0);
    int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0);
    int xkolik = 18;
    int ykolik = 18;
    int xOd = 0;
    int yOd = 0;
    int i = threadIdx.x + blockDim.x*blockIdx.x;
    const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >();
    
    if( blockIdx == 0 )
        xOd = 1;
    if( blockIdy == 0 )
        yOd = 1;
    if( i < mesh.getDimensions().x()  )
    {
        typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell;
        Cell cell( mesh );
        cell.getCoordinates().x() = i;
        cell.refresh();
        const Index cind = cell.getIndex();

    if( numOfBlockx - 1 == blockIdx )
        xkolik = dimX - (numOfBlockx-1)*16+1;

    if( numOfBlocky -1 == blockIdy )
        ykolik = dimY - (numOfBlocky-1)*16+1;
        output[ cind ] =
               input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() :
                                    - TypeInfo< Real >::getMaxValue();
        interfaceMap[ cind ] = false; 

    if( dimX > (blockIdx+1) * 16 )
    {
        for( int i = yOd; i < ykolik; i++ )
        {
            sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ];
        }
    }
    if( blockIdx != 0 )
    {
        for( int i = yOd; i < ykolik; i++ )
        {
            sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0];
        }
    } 
    if( dimY > (blockIdy+1) * 16 )
        const Real& h = mesh.getSpaceSteps().x();
        cell.refresh();
        const Real& c = input( cell );
        if( ! cell.isBoundaryEntity()  )
        {
        for( int i = xOd; i < xkolik; i++ )
           auto neighbors = cell.getNeighborEntities();
           Real pom = 0;
           const Index e = neighbors.template getEntityIndex< 1 >();
           const Index w = neighbors.template getEntityIndex< -1 >();
           if( c * input[ e ] <= 0 )
           {
            sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ];
        }
               pom = TNL::sign( c )*( h * c )/( c - input[ e ]);
               if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) )
                   output[ cind ] = pom;                       

               interfaceMap[ cind ] = true;
           }
    if( blockIdy != 0 )
    {
        for( int i = xOd; i < xkolik; i++ )
           if( c * input[ w ] <= 0 )
           {
            sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ];
               pom = TNL::sign( c )*( h * c )/( c - input[ w ]);
               if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) 
                   output[ cind ] = pom;

               interfaceMap[ cind ] = true;
           }
        }
    }
           


#ifdef HAVE_CUDA
}
template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
                                Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
@@ -886,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
    {
        sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrj ][ thri ];
        if ( fabs( tmp ) >  0.001 )
        if ( fabs( tmp ) >  0.01*hx )
            return true;
        else
            return false;
@@ -898,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
            ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
        sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
        tmp = value - sArray[ thrj ][ thri ];
        if ( fabs( tmp ) > 0.01 )
        if ( fabs( tmp ) > 0.01*hx )
            return true;
        else
            return false;
@@ -906,4 +965,33 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
    
    return false;
}

template< typename Real,
          typename Device,
          typename Index >
__cuda_callable__
bool
tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
updateCell( volatile Real sArray[18], int thri, const Real h, const Real v )
{
   const RealType value = sArray[ thri ];
   RealType a, tmp = TypeInfo< RealType >::getMaxValue();
      
   a = TNL::argAbsMin( sArray[ thri+1 ],
                       sArray[ thri-1 ] );

    if( fabs( a ) == TypeInfo< RealType >::getMaxValue() )
       return false;
   
    tmp = a + TNL::sign( value ) * h/v;
    
                                
    sArray[ thri ] = argAbsMin( value, tmp );
    
    tmp = value - sArray[ thri ];
    if ( fabs( tmp ) >  0.01*h )
        return true;
    else
        return false;
}
#endif
+184 −7
Original line number Diff line number Diff line
@@ -58,13 +58,190 @@ solve( const MeshPointer& mesh,
       const AnisotropyPointer& anisotropy,
       MeshFunctionPointer& u )
{   
   MeshFunctionPointer aux;
   InterfaceMapPointer interfaceMap;
   aux->setMesh( mesh );
   interfaceMap->setMesh( mesh );
   MeshFunctionPointer auxPtr;
   InterfaceMapPointer interfaceMapPtr;
   auxPtr->setMesh( mesh );
   interfaceMapPtr->setMesh( mesh );
   std::cout << "Initiating the interface cells ..." << std::endl;
   BaseType::initInterface( u, aux, interfaceMap );
   aux->save( "aux-ini.tnl" );
   BaseType::initInterface( u, auxPtr, interfaceMapPtr );
        
   auxPtr->save( "aux-ini.tnl" );

   typename MeshType::Cell cell( *mesh );
   
   IndexType iteration( 0 );
   InterfaceMapType interfaceMap = *interfaceMapPtr;
   MeshFunctionType aux = *auxPtr;
   while( iteration < this->maxIterations )
   {
      if( std::is_same< DeviceType, Devices::Host >::value )
      {
          for( cell.getCoordinates().x() = 0;
               cell.getCoordinates().x() < mesh->getDimensions().x();
               cell.getCoordinates().x()++ )
             {
                cell.refresh();
                if( ! interfaceMap( cell ) )
                   this->updateCell( aux, cell );
             }

         
        for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
             cell.getCoordinates().x() >= 0 ;
             cell.getCoordinates().x()-- )		
           {
              cell.refresh();
              if( ! interfaceMap( cell ) )            
                 this->updateCell( aux, cell );
           }
      }
      if( std::is_same< DeviceType, Devices::Cuda >::value )
      {
         // TODO: CUDA code
#ifdef HAVE_CUDA
          const int cudaBlockSize( 16 );
          int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
          dim3 blockSize( cudaBlockSize );
          dim3 gridSize( numBlocksX );
          
          tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr;
          
          
          
          bool* BlockIter = (bool*)malloc( ( numBlocksX ) * sizeof( bool ) );
          
          bool *BlockIterDevice;
          cudaMalloc(&BlockIterDevice, ( numBlocksX ) * sizeof( bool ) );
          
          while( BlockIter[ 0 ] )
          {
           for( int i = 0; i < numBlocksX; i++ )
                BlockIter[ i ] = false;
           cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX ) * sizeof( bool ), cudaMemcpyHostToDevice);
                       
            CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
                                                             interfaceMapPtr.template getData< Device >(),
                                                             auxPtr.template modifyData< Device>(),
                                                             BlockIterDevice );
            cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX ) * sizeof( bool ), cudaMemcpyDeviceToHost);
                                   
            for( int i = 1; i < numBlocksX; i++ )
                BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];
            
          }
          delete[] BlockIter;
          cudaFree( BlockIterDevice );
          cudaDeviceSynchronize();
          
          TNL_CHECK_CUDA_DEVICE;
              
          aux = *auxPtr;
          interfaceMap = *interfaceMapPtr;
#endif
      }
      iteration++;
   }
   
   aux.save("aux-final.tnl");
}

#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr,
                                      const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap,
                                      Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
                                      bool *BlockIterDevice )
{
    int thri = threadIdx.x;
    int blIdx = blockIdx.x;
    int i = thri + blockDim.x*blIdx;
    
    __shared__ volatile bool changed[16];
    changed[ thri ] = false;
    
    if( thri == 0 )
        changed[ 0 ] = true;
    
    const Meshes::Grid< 1, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
    __shared__ Real h;
    if( thri == 1 )
    {
        h = mesh.getSpaceSteps().x();
    }
    
    __shared__ volatile Real sArray[ 18 ];
    sArray[thri] = TypeInfo< Real >::getMaxValue();
    
    //filling sArray edges
    int dimX = mesh.getDimensions().x(); 
    __shared__ volatile int numOfBlockx;
    __shared__ int xkolik;
    if( thri == 0 )
    {
        xkolik = blockDim.x + 1;
        numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0);
    
        if( numOfBlockx - 1 == blIdx )
            xkolik = dimX - (blIdx)*blockDim.x+1;
    }
    __syncthreads();
    
    if( thri == 0 )
    {        
        if( dimX > (blIdx+1) * blockDim.x )
            sArray[xkolik] = aux[ blIdx*blockDim.x - 1 + xkolik ];
        else
            sArray[xkolik] = TypeInfo< Real >::getMaxValue();
    }
    
    if( thri == 1 )
    {
        if( blIdx != 0 )
            sArray[0] = aux[ blIdx*blockDim.x - 1 ];
        else
            sArray[0] = TypeInfo< Real >::getMaxValue();
    }
    
        
    if( i < mesh.getDimensions().x() )
    {    
        sArray[thri+1] = aux[ i ];
    }
    __syncthreads();  

    while( changed[ 0 ] )
    {
        __syncthreads();
        
        changed[ thri ] = false;
        
    //calculation of update cell
        if( i < mesh.getDimensions().x() )
        {
            if( ! interfaceMap[ i ] )
            {
                changed[ thri ] = ptr.updateCell( sArray, thri+1, h );
            }
        }
        __syncthreads();
        
        
        
    //pyramid reduction
        if( thri < 8 ) changed[ thri ] = changed[ thri ] || changed[ thri + 8 ];
        if( thri < 4 ) changed[ thri ] = changed[ thri ] || changed[ thri + 4 ];
        if( thri < 2 ) changed[ thri ] = changed[ thri ] || changed[ thri + 2 ];
        if( thri < 1 ) changed[ thri ] = changed[ thri ] || changed[ thri + 1 ];
        __syncthreads();

        if( changed[ 0 ] && thri == 0 )
            BlockIterDevice[ blIdx ] = true;
        __syncthreads();
    }
  
    if( i < mesh.getDimensions().x()  && (!interfaceMap[ i ]) )
        aux[ i ] = sArray[ thri + 1 ];
}
#endif

+36 −31
Original line number Diff line number Diff line
@@ -18,13 +18,16 @@
#include <TNL/Devices/Cuda.h>


#include <iostream>
#include <fstream>

template< typename Real,
          typename Device,
          typename Index,
          typename Anisotropy >
FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >::
FastSweepingMethod()
: maxIterations( 1 )
: maxIterations( 2 )
{
   
}
@@ -61,15 +64,31 @@ solve( const MeshPointer& mesh,
       const AnisotropyPointer& anisotropy,
       MeshFunctionPointer& u )
{
   MeshFunctionType v;
   v.setMesh(mesh);
   double A[320][320];
    for (int i = 0; i < 320; i++)
        for (int j = 0; j < 320; j++)
            A[i][j] = 0;
    
    std::ifstream file("/home/maty/Downloads/mapa2.txt");

    for (int i = 0; i < 320; i++)
        for (int j = 0; j < 320; j++)
            file >> A[i][j];
    file.close();
    for (int i = 0; i < 320; i++)
        for (int j = 0; j < 320; j++)
            v[i*320 + j] = A[i][j];
   v.save("mapa.tnl");
   
       
   MeshFunctionPointer auxPtr;
   InterfaceMapPointer interfaceMapPtr;
   auxPtr->setMesh( mesh );
   interfaceMapPtr->setMesh( mesh );
   std::cout << "Initiating the interface cells ..." << std::endl;
   BaseType::initInterface( u, auxPtr, interfaceMapPtr );
#ifdef HAVE_CUDA
   cudaDeviceSynchronize();
#endif
        
   auxPtr->save( "aux-ini.tnl" );

@@ -92,7 +111,7 @@ solve( const MeshPointer& mesh,
               {
                  cell.refresh();
                  if( ! interfaceMap( cell ) )
                     this->updateCell( aux, cell );
                     this->updateCell( aux, cell, v( cell ) );
               }
         }

@@ -109,7 +128,7 @@ solve( const MeshPointer& mesh,
                  //std::cerr << "2 -> ";
                  cell.refresh();
                  if( ! interfaceMap( cell ) )            
                     this->updateCell( aux, cell );
                     this->updateCell( aux, cell, v( cell ) );
               }
         }

@@ -126,7 +145,7 @@ solve( const MeshPointer& mesh,
                  //std::cerr << "3 -> ";
                  cell.refresh();
                  if( ! interfaceMap( cell ) )            
                     this->updateCell( aux, cell );
                     this->updateCell( aux, cell, v( cell ) );
               }
            }

@@ -143,7 +162,7 @@ solve( const MeshPointer& mesh,
                  //std::cerr << "4 -> ";
                  cell.refresh();
                  if( ! interfaceMap( cell ) )            
                     this->updateCell( aux, cell );
                     this->updateCell( aux, cell, v( cell ) );
               }
            }

@@ -222,7 +241,6 @@ solve( const MeshPointer& mesh,
          int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
          dim3 blockSize( cudaBlockSize, cudaBlockSize );
          dim3 gridSize( numBlocksX, numBlocksY );
          Devices::Cuda::synchronizeDevice();
          
          tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;
          
@@ -243,20 +261,8 @@ solve( const MeshPointer& mesh,
                                                             interfaceMapPtr.template getData< Device >(),
                                                             auxPtr.template modifyData< Device>(),
                                                             BlockIterDevice );
            cudaDeviceSynchronize();         
            TNL_CHECK_CUDA_DEVICE;
            cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost);
                                   
            /*for( int j = numBlocksY-1; j > -1; j-- )
            {    
                for( int i = 0; i < numBlocksX; i++ )
                {
                    std::cout << BlockIter[ j * numBlocksY + i ] << "\t";  
                }
                std::cout << std::endl;
            }
            std::cout << std::endl;*/
                         
            for( int i = 1; i < numBlocksX * numBlocksY; i++ )
                BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];
            
@@ -294,12 +300,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
    
    if( thrj == 0 && thri == 0 )
        changed[ 0 ] = true;
    __syncthreads();
    
    const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
    __shared__ Real hx;
    __shared__ Real hy;
    if( thrj == 0 && thri == 0 )
    if( thrj == 1 && thri == 1 )
    {
        hx = mesh.getSpaceSteps().x();
        hy = mesh.getSpaceSteps().y();
@@ -345,20 +350,20 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
            sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue();
    }
    
    if( thrj == 0 )
    if( thri == 2 )
    {
        if( dimY > (blIdy+1) * blockDim.y  && thri+1 < xkolik )
            sArray[ykolik][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thri+1 ];
            sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ];
        else
           sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue();
           sArray[ykolik][thrj+1] = TypeInfo< Real >::getMaxValue();
    }
    
    if( thrj == 1 )
    if( thri == 3 )
    {
        if( blIdy != 0 && thri+1 < xkolik )
            sArray[0][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thri+1 ];
            sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ];
        else
            sArray[0][thri+1] = TypeInfo< Real >::getMaxValue();
            sArray[0][thrj+1] = TypeInfo< Real >::getMaxValue();
    }
    
        
@@ -422,7 +427,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
            if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ];
        }
        if( changed[ 0 ] && thri == 0 && thrj == 0 )
            BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ];
            BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = true;
        __syncthreads();
    }