diff --git a/src/mesh/tnlTraverser_Grid2D_impl.h b/src/mesh/tnlTraverser_Grid2D_impl.h
index 228aabca482f4850ae8c8bbd621859b23a538038..4d8efa2df82898134c560b3c9458f6d71b313dfe 100644
--- a/src/mesh/tnlTraverser_Grid2D_impl.h
+++ b/src/mesh/tnlTraverser_Grid2D_impl.h
@@ -167,6 +167,25 @@ processBoundaryEntities( const GridType& grid,
    /****
     * Traversing boundary vertices
     */
+   CoordinatesType coordinates;
+   const IndexType& xSize = grid.getDimensions().x();
+   const IndexType& ySize = grid.getDimensions().y();
+
+   for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
+   {
+      coordinates.y() = 0;
+      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+      coordinates.y() = ySize;
+      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+   }
+   for( coordinates.y() = 1; coordinates.y() <= ySize; coordinates.y() ++ )
+   {
+      coordinates.x() = 0;
+      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+      coordinates.x() = xSize;
+      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+   }
+   
 }
 
 template< typename Real,
@@ -181,8 +200,20 @@ processInteriorEntities( const GridType& grid,
    /****
     * Traversing interior vertices
     */
-}
+   CoordinatesType coordinates;
+   const IndexType& xSize = grid.getDimensions().x();
+   const IndexType& ySize = grid.getDimensions().y();
 
+#ifdef HAVE_OPENMP
+//#pragma omp parallel for
+#endif
+   for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ )
+      for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ )
+      {
+         const IndexType index = grid.getVertexIndex( coordinates );
+         EntitiesProcessor::processVertex( grid, userData, index, coordinates );
+      }  
+}
 
 /***
  *
@@ -331,6 +362,71 @@ __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda
    }
 }
 
+template< typename Real,
+          typename Index,
+          typename UserData,
+          typename EntitiesProcessor >
+__global__ void tnlTraverserGrid2DBoundaryVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid,
+                                                    UserData* userData,
+                                                    const Index gridXIdx,
+                                                    const Index gridYIdx )
+{
+   typedef Real RealType;
+   typedef Index IndexType;
+   typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
+   typedef typename GridType::CoordinatesType CoordinatesType;
+
+   const IndexType& xSize = grid->getDimensions().x();
+   const IndexType& ySize = grid->getDimensions().y();
+
+   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
+                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y );
+
+   if( vertexCoordinates.x() <= grid->getDimensions().x() &&
+       vertexCoordinates.y() <= grid->getDimensions().y() )
+   {
+      if( grid->isBoundaryVertex( vertexCoordinates ) )
+      {
+         EntitiesProcessor::processVertex( *grid,
+                                           *userData,
+                                           grid->getVertexIndex( vertexCoordinates ),
+                                           vertexCoordinates );
+      }
+   }
+}
+
+template< typename Real,
+          typename Index,
+          typename UserData,
+          typename EntitiesProcessor >
+__global__ void tnlTraverserGrid2DInteriorVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid,
+                                                    UserData* userData,
+                                                    const Index gridXIdx,
+                                                    const Index gridYIdx )
+{
+   typedef Real RealType;
+   typedef Index IndexType;
+   typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
+   typedef typename GridType::CoordinatesType CoordinatesType;
+
+   const IndexType& xSize = grid->getDimensions().x();
+   const IndexType& ySize = grid->getDimensions().y();
+
+   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
+                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y );
+
+   if( vertexCoordinates.x() <= grid->getDimensions().x() &&
+       vertexCoordinates.y() <= grid->getDimensions().y() )
+   {
+      if( ! grid->isBoundaryVertex( vertexCoordinates ) )
+      {
+         EntitiesProcessor::processVertex( *grid,
+                                           *userData,
+                                           grid->getVertexIndex( vertexCoordinates ),
+                                           vertexCoordinates );
+      }
+   }
+}
 
 
 #endif
@@ -555,9 +651,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >::
 processBoundaryEntities( const GridType& grid,
                          UserData& userData ) const
 {
+#ifdef HAVE_CUDA
    /****
-    * Boundary interior vertices
+    * Traversing boundary vertices    
     */
+   GridType* kernelGrid = tnlCuda::passToDevice( grid );
+   UserData* kernelUserData = tnlCuda::passToDevice( userData );
+
+   dim3 cudaBlockSize( 16, 16 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
+   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
+   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
+   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
+
+   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
+      {
+         tnlTraverserGrid2DBoundaryVertices< Real, Index, UserData, EntitiesProcessor >
+                                           <<< cudaBlocks, cudaBlockSize >>>
+                                          ( kernelGrid,
+                                            kernelUserData,
+                                            gridXIdx,
+                                            gridYIdx );
+         checkCudaDevice;
+      }
+   cudaThreadSynchronize();   
+#endif
 }
 
 
@@ -570,9 +690,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >::
 processInteriorEntities( const GridType& grid,
                          UserData& userData ) const
 {
+#ifdef HAVE_CUDA
    /****
-    * Traversing interior vertices
+    * Traversing interior vertices    
     */
+   GridType* kernelGrid = tnlCuda::passToDevice( grid );
+   UserData* kernelUserData = tnlCuda::passToDevice( userData );
+
+   dim3 cudaBlockSize( 16, 16 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
+   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
+   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
+   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
+
+   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
+      {
+         tnlTraverserGrid2DInteriorVertices< Real, Index, UserData, EntitiesProcessor >
+                                           <<< cudaBlocks, cudaBlockSize >>>
+                                          ( kernelGrid,
+                                            kernelUserData,
+                                            gridXIdx,
+                                            gridYIdx );
+         checkCudaDevice;
+      }
+   cudaThreadSynchronize();   
+#endif
 }
 
 
diff --git a/src/mesh/tnlTraverser_Grid3D_impl.h b/src/mesh/tnlTraverser_Grid3D_impl.h
index 9cd074ca5fbad055e2e0f5c6420df935b767835b..739e12cd5e58f3e459911f118af61fb421e76cca 100644
--- a/src/mesh/tnlTraverser_Grid3D_impl.h
+++ b/src/mesh/tnlTraverser_Grid3D_impl.h
@@ -161,6 +161,37 @@ processBoundaryEntities( const GridType& grid,
    /****
     * Traversing boundary vertices
     */
+   CoordinatesType coordinates;
+   const IndexType& xSize = grid.getDimensions().x();
+   const IndexType& ySize = grid.getDimensions().y();
+   const IndexType& zSize = grid.getDimensions().z();
+
+   for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ )
+      for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
+      {
+         coordinates.z() = 0;
+         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+         coordinates.z() = zSize;
+         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+      }
+
+   for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ )
+      for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
+      {
+         coordinates.y() = 0;
+         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+         coordinates.y() = ySize;
+         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+      }
+
+   for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ )
+      for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ )
+      {
+         coordinates.x() = 0;
+         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+         coordinates.x() = xSize;
+         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
+      }
 }
 
 template< typename Real,
@@ -175,6 +206,21 @@ processInteriorEntities( const GridType& grid,
    /****
     * Traversing interior vertices
     */
+   CoordinatesType coordinates;
+   const IndexType& xSize = grid.getDimensions().x();
+   const IndexType& ySize = grid.getDimensions().y();
+   const IndexType& zSize = grid.getDimensions().z();
+
+#ifdef HAVE_OPENMP
+//#pragma omp parallel for
+#endif
+   for( coordinates.z() = 1; coordinates.z() < zSize; coordinates.z() ++ )
+      for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ )
+         for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ )
+         {
+            const IndexType index = grid.getVertexIndex( coordinates );
+            EntitiesProcessor::processVertex( grid, userData, index, coordinates );
+         }
 }
 
 
@@ -260,6 +306,81 @@ __global__ void tnlTraverserGrid3DInteriorCells( const tnlGrid< 3, Real, tnlCuda
    }
 }
 
+template< typename Real,
+          typename Index,
+          typename UserData,
+          typename EntitiesProcessor >
+__global__ void tnlTraverserGrid3DBoundaryVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid,
+                                                    UserData* userData,
+                                                    const Index gridXIdx,
+                                                    const Index gridYIdx,
+                                                    const Index gridZIdx )
+{
+   typedef Real RealType;
+   typedef Index IndexType;
+   typedef tnlGrid< 3, Real, tnlCuda, Index > GridType;
+   typedef typename GridType::CoordinatesType CoordinatesType;
+
+   const IndexType& xSize = grid->getDimensions().x();
+   const IndexType& ySize = grid->getDimensions().y();
+   const IndexType& zSize = grid->getDimensions().z();
+
+   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
+                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y,
+                                      ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z );
+
+   if( vertexCoordinates.x() < grid->getDimensions().x() &&
+       vertexCoordinates.y() < grid->getDimensions().y() &&
+       vertexCoordinates.z() < grid->getDimensions().z() )
+   {
+      if( grid->isBoundaryVertex( vertexCoordinates ) )
+      {
+         EntitiesProcessor::processVertex( *grid,
+                                           *userData,
+                                           grid->getVertexIndex( vertexCoordinates ),
+                                           vertexCoordinates );
+      }
+   }
+}
+
+template< typename Real,
+          typename Index,
+          typename UserData,
+          typename EntitiesProcessor >
+__global__ void tnlTraverserGrid3DInteriorVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid,
+                                                    UserData* userData,
+                                                    const Index gridXIdx,
+                                                    const Index gridYIdx,
+                                                    const Index gridZIdx )
+{
+   typedef Real RealType;
+   typedef Index IndexType;
+   typedef tnlGrid< 3, Real, tnlCuda, Index > GridType;
+   typedef typename GridType::CoordinatesType CoordinatesType;
+
+   const IndexType& xSize = grid->getDimensions().x();
+   const IndexType& ySize = grid->getDimensions().y();
+   const IndexType& zSize = grid->getDimensions().z();
+
+   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
+                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y,
+                                      ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z );
+
+   if( vertexCoordinates.x() < grid->getDimensions().x() &&
+       vertexCoordinates.y() < grid->getDimensions().y() &&
+       vertexCoordinates.z() < grid->getDimensions().z() )
+   {
+      if( ! grid->isBoundaryVertex( vertexCoordinates ) )
+      {
+         EntitiesProcessor::processVertex( *grid,
+                                           *userData,
+                                           grid->getVertexIndex( vertexCoordinates ),
+                                           vertexCoordinates );
+      }
+   }
+}
+
+
 #endif
 
 template< typename Real,
@@ -419,6 +540,35 @@ processBoundaryEntities( const GridType& grid,
    /****
     * Traversing boundary vertices
     */
+#ifdef HAVE_CUDA
+   GridType* kernelGrid = tnlCuda::passToDevice( grid );
+   UserData* kernelUserData = tnlCuda::passToDevice( userData );
+
+   dim3 cudaBlockSize( 8, 8, 4 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
+   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
+   cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z );
+   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
+   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
+   const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z );
+
+   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
+         for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
+         {
+            tnlTraverserGrid3DBoundaryVertices< Real, Index, UserData, EntitiesProcessor >
+                                              <<< cudaBlocks, cudaBlockSize >>>
+                                             ( kernelGrid,
+                                               kernelUserData,
+                                               gridXIdx,
+                                               gridYIdx,
+                                               gridZIdx );
+         }
+   cudaThreadSynchronize();
+   checkCudaDevice;
+#endif
+   
 }
 
 template< typename Real,
@@ -433,6 +583,35 @@ processInteriorEntities( const GridType& grid,
    /****
     * Traversing interior vertices
     */
+#ifdef HAVE_CUDA
+   GridType* kernelGrid = tnlCuda::passToDevice( grid );
+   UserData* kernelUserData = tnlCuda::passToDevice( userData );
+
+   dim3 cudaBlockSize( 8, 8, 4 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
+   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
+   cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z );
+   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
+   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
+   const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z );
+
+   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
+         for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
+         {
+            tnlTraverserGrid3DInteriorVertices< Real, Index, UserData, EntitiesProcessor >
+                                              <<< cudaBlocks, cudaBlockSize >>>
+                                             ( kernelGrid,
+                                               kernelUserData,
+                                               gridXIdx,
+                                               gridYIdx,
+                                               gridZIdx );
+         }
+   cudaThreadSynchronize();
+   checkCudaDevice;
+#endif
+   
 }