Commit 41091991 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Traversing of grid vertices and faces was added.

parent ef15d04e
Loading
Loading
Loading
Loading
+147 −3
Original line number Diff line number Diff line
@@ -167,6 +167,25 @@ processBoundaryEntities( const GridType& grid,
   /****
    * Traversing boundary vertices
    */
   CoordinatesType coordinates;
   const IndexType& xSize = grid.getDimensions().x();
   const IndexType& ySize = grid.getDimensions().y();

   for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
   {
      coordinates.y() = 0;
      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
      coordinates.y() = ySize;
      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
   }
   for( coordinates.y() = 1; coordinates.y() <= ySize; coordinates.y() ++ )
   {
      coordinates.x() = 0;
      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
      coordinates.x() = xSize;
      EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
   }
   
}

template< typename Real,
@@ -181,8 +200,20 @@ processInteriorEntities( const GridType& grid,
   /****
    * Traversing interior vertices
    */
}
   CoordinatesType coordinates;
   const IndexType& xSize = grid.getDimensions().x();
   const IndexType& ySize = grid.getDimensions().y();

#ifdef HAVE_OPENMP
//#pragma omp parallel for
#endif
   for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ )
      for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ )
      {
         const IndexType index = grid.getVertexIndex( coordinates );
         EntitiesProcessor::processVertex( grid, userData, index, coordinates );
      }  
}

/***
 *
@@ -331,6 +362,71 @@ __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda
   }
}

template< typename Real,
          typename Index,
          typename UserData,
          typename EntitiesProcessor >
__global__ void tnlTraverserGrid2DBoundaryVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid,
                                                    UserData* userData,
                                                    const Index gridXIdx,
                                                    const Index gridYIdx )
{
   typedef Real RealType;
   typedef Index IndexType;
   typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
   typedef typename GridType::CoordinatesType CoordinatesType;

   const IndexType& xSize = grid->getDimensions().x();
   const IndexType& ySize = grid->getDimensions().y();

   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y );

   if( vertexCoordinates.x() <= grid->getDimensions().x() &&
       vertexCoordinates.y() <= grid->getDimensions().y() )
   {
      if( grid->isBoundaryVertex( vertexCoordinates ) )
      {
         EntitiesProcessor::processVertex( *grid,
                                           *userData,
                                           grid->getVertexIndex( vertexCoordinates ),
                                           vertexCoordinates );
      }
   }
}

template< typename Real,
          typename Index,
          typename UserData,
          typename EntitiesProcessor >
__global__ void tnlTraverserGrid2DInteriorVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid,
                                                    UserData* userData,
                                                    const Index gridXIdx,
                                                    const Index gridYIdx )
{
   typedef Real RealType;
   typedef Index IndexType;
   typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
   typedef typename GridType::CoordinatesType CoordinatesType;

   const IndexType& xSize = grid->getDimensions().x();
   const IndexType& ySize = grid->getDimensions().y();

   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y );

   if( vertexCoordinates.x() <= grid->getDimensions().x() &&
       vertexCoordinates.y() <= grid->getDimensions().y() )
   {
      if( ! grid->isBoundaryVertex( vertexCoordinates ) )
      {
         EntitiesProcessor::processVertex( *grid,
                                           *userData,
                                           grid->getVertexIndex( vertexCoordinates ),
                                           vertexCoordinates );
      }
   }
}


#endif
@@ -555,9 +651,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >::
processBoundaryEntities( const GridType& grid,
                         UserData& userData ) const
{
#ifdef HAVE_CUDA
   /****
    * Boundary interior vertices
    * Traversing boundary vertices    
    */
   GridType* kernelGrid = tnlCuda::passToDevice( grid );
   UserData* kernelUserData = tnlCuda::passToDevice( userData );

   dim3 cudaBlockSize( 16, 16 );
   dim3 cudaBlocks;
   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );

   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
      {
         tnlTraverserGrid2DBoundaryVertices< Real, Index, UserData, EntitiesProcessor >
                                           <<< cudaBlocks, cudaBlockSize >>>
                                          ( kernelGrid,
                                            kernelUserData,
                                            gridXIdx,
                                            gridYIdx );
         checkCudaDevice;
      }
   cudaThreadSynchronize();   
#endif
}


@@ -570,9 +690,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >::
processInteriorEntities( const GridType& grid,
                         UserData& userData ) const
{
#ifdef HAVE_CUDA
   /****
    * Traversing interior vertices    
    */
   GridType* kernelGrid = tnlCuda::passToDevice( grid );
   UserData* kernelUserData = tnlCuda::passToDevice( userData );

   dim3 cudaBlockSize( 16, 16 );
   dim3 cudaBlocks;
   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );

   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
      {
         tnlTraverserGrid2DInteriorVertices< Real, Index, UserData, EntitiesProcessor >
                                           <<< cudaBlocks, cudaBlockSize >>>
                                          ( kernelGrid,
                                            kernelUserData,
                                            gridXIdx,
                                            gridYIdx );
         checkCudaDevice;
      }
   cudaThreadSynchronize();   
#endif
}


+179 −0
Original line number Diff line number Diff line
@@ -161,6 +161,37 @@ processBoundaryEntities( const GridType& grid,
   /****
    * Traversing boundary vertices
    */
   CoordinatesType coordinates;
   const IndexType& xSize = grid.getDimensions().x();
   const IndexType& ySize = grid.getDimensions().y();
   const IndexType& zSize = grid.getDimensions().z();

   for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ )
      for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
      {
         coordinates.z() = 0;
         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
         coordinates.z() = zSize;
         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
      }

   for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ )
      for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
      {
         coordinates.y() = 0;
         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
         coordinates.y() = ySize;
         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
      }

   for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ )
      for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ )
      {
         coordinates.x() = 0;
         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
         coordinates.x() = xSize;
         EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
      }
}

template< typename Real,
@@ -175,6 +206,21 @@ processInteriorEntities( const GridType& grid,
   /****
    * Traversing interior vertices
    */
   CoordinatesType coordinates;
   const IndexType& xSize = grid.getDimensions().x();
   const IndexType& ySize = grid.getDimensions().y();
   const IndexType& zSize = grid.getDimensions().z();

#ifdef HAVE_OPENMP
//#pragma omp parallel for
#endif
   for( coordinates.z() = 1; coordinates.z() < zSize; coordinates.z() ++ )
      for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ )
         for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ )
         {
            const IndexType index = grid.getVertexIndex( coordinates );
            EntitiesProcessor::processVertex( grid, userData, index, coordinates );
         }
}


@@ -260,6 +306,81 @@ __global__ void tnlTraverserGrid3DInteriorCells( const tnlGrid< 3, Real, tnlCuda
   }
}

template< typename Real,
          typename Index,
          typename UserData,
          typename EntitiesProcessor >
__global__ void tnlTraverserGrid3DBoundaryVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid,
                                                    UserData* userData,
                                                    const Index gridXIdx,
                                                    const Index gridYIdx,
                                                    const Index gridZIdx )
{
   typedef Real RealType;
   typedef Index IndexType;
   typedef tnlGrid< 3, Real, tnlCuda, Index > GridType;
   typedef typename GridType::CoordinatesType CoordinatesType;

   const IndexType& xSize = grid->getDimensions().x();
   const IndexType& ySize = grid->getDimensions().y();
   const IndexType& zSize = grid->getDimensions().z();

   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y,
                                      ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z );

   if( vertexCoordinates.x() < grid->getDimensions().x() &&
       vertexCoordinates.y() < grid->getDimensions().y() &&
       vertexCoordinates.z() < grid->getDimensions().z() )
   {
      if( grid->isBoundaryVertex( vertexCoordinates ) )
      {
         EntitiesProcessor::processVertex( *grid,
                                           *userData,
                                           grid->getVertexIndex( vertexCoordinates ),
                                           vertexCoordinates );
      }
   }
}

template< typename Real,
          typename Index,
          typename UserData,
          typename EntitiesProcessor >
__global__ void tnlTraverserGrid3DInteriorVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid,
                                                    UserData* userData,
                                                    const Index gridXIdx,
                                                    const Index gridYIdx,
                                                    const Index gridZIdx )
{
   typedef Real RealType;
   typedef Index IndexType;
   typedef tnlGrid< 3, Real, tnlCuda, Index > GridType;
   typedef typename GridType::CoordinatesType CoordinatesType;

   const IndexType& xSize = grid->getDimensions().x();
   const IndexType& ySize = grid->getDimensions().y();
   const IndexType& zSize = grid->getDimensions().z();

   CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
                                      ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y,
                                      ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z );

   if( vertexCoordinates.x() < grid->getDimensions().x() &&
       vertexCoordinates.y() < grid->getDimensions().y() &&
       vertexCoordinates.z() < grid->getDimensions().z() )
   {
      if( ! grid->isBoundaryVertex( vertexCoordinates ) )
      {
         EntitiesProcessor::processVertex( *grid,
                                           *userData,
                                           grid->getVertexIndex( vertexCoordinates ),
                                           vertexCoordinates );
      }
   }
}


#endif

template< typename Real,
@@ -419,6 +540,35 @@ processBoundaryEntities( const GridType& grid,
   /****
    * Traversing boundary vertices
    */
#ifdef HAVE_CUDA
   GridType* kernelGrid = tnlCuda::passToDevice( grid );
   UserData* kernelUserData = tnlCuda::passToDevice( userData );

   dim3 cudaBlockSize( 8, 8, 4 );
   dim3 cudaBlocks;
   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
   cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z );
   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
   const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z );

   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
         for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
         {
            tnlTraverserGrid3DBoundaryVertices< Real, Index, UserData, EntitiesProcessor >
                                              <<< cudaBlocks, cudaBlockSize >>>
                                             ( kernelGrid,
                                               kernelUserData,
                                               gridXIdx,
                                               gridYIdx,
                                               gridZIdx );
         }
   cudaThreadSynchronize();
   checkCudaDevice;
#endif
   
}

template< typename Real,
@@ -433,6 +583,35 @@ processInteriorEntities( const GridType& grid,
   /****
    * Traversing interior vertices
    */
#ifdef HAVE_CUDA
   GridType* kernelGrid = tnlCuda::passToDevice( grid );
   UserData* kernelUserData = tnlCuda::passToDevice( userData );

   dim3 cudaBlockSize( 8, 8, 4 );
   dim3 cudaBlocks;
   cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
   cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
   cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z );
   const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
   const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
   const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z );

   for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
         for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
         {
            tnlTraverserGrid3DInteriorVertices< Real, Index, UserData, EntitiesProcessor >
                                              <<< cudaBlocks, cudaBlockSize >>>
                                             ( kernelGrid,
                                               kernelUserData,
                                               gridXIdx,
                                               gridYIdx,
                                               gridZIdx );
         }
   cudaThreadSynchronize();
   checkCudaDevice;
#endif
   
}