diff --git a/src/mesh/tnlTraverser_Grid2D_impl.h b/src/mesh/tnlTraverser_Grid2D_impl.h index 228aabca482f4850ae8c8bbd621859b23a538038..4d8efa2df82898134c560b3c9458f6d71b313dfe 100644 --- a/src/mesh/tnlTraverser_Grid2D_impl.h +++ b/src/mesh/tnlTraverser_Grid2D_impl.h @@ -167,6 +167,25 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ + CoordinatesType coordinates; + const IndexType& xSize = grid.getDimensions().x(); + const IndexType& ySize = grid.getDimensions().y(); + + for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) + { + coordinates.y() = 0; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + coordinates.y() = ySize; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + } + for( coordinates.y() = 1; coordinates.y() <= ySize; coordinates.y() ++ ) + { + coordinates.x() = 0; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + coordinates.x() = xSize; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + } + } template< typename Real, @@ -181,8 +200,20 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ -} + CoordinatesType coordinates; + const IndexType& xSize = grid.getDimensions().x(); + const IndexType& ySize = grid.getDimensions().y(); +#ifdef HAVE_OPENMP +//#pragma omp parallel for +#endif + for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) + for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) + { + const IndexType index = grid.getVertexIndex( coordinates ); + EntitiesProcessor::processVertex( grid, userData, index, coordinates ); + } +} /*** * @@ -331,6 +362,71 @@ __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda } } +template< typename Real, + typename Index, + typename UserData, + typename EntitiesProcessor > +__global__ void tnlTraverserGrid2DBoundaryVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid, + UserData* userData, + const Index gridXIdx, + const Index gridYIdx ) +{ + typedef Real RealType; + typedef Index IndexType; + typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; + typedef typename GridType::CoordinatesType CoordinatesType; + + const IndexType& xSize = grid->getDimensions().x(); + const IndexType& ySize = grid->getDimensions().y(); + + CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, + ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); + + if( vertexCoordinates.x() <= grid->getDimensions().x() && + vertexCoordinates.y() <= grid->getDimensions().y() ) + { + if( grid->isBoundaryVertex( vertexCoordinates ) ) + { + EntitiesProcessor::processVertex( *grid, + *userData, + grid->getVertexIndex( vertexCoordinates ), + vertexCoordinates ); + } + } +} + +template< typename Real, + typename Index, + typename UserData, + typename EntitiesProcessor > +__global__ void tnlTraverserGrid2DInteriorVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid, + UserData* userData, + const Index gridXIdx, + const Index gridYIdx ) +{ + typedef Real RealType; + typedef Index IndexType; + typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; + typedef typename GridType::CoordinatesType CoordinatesType; + + const IndexType& xSize = grid->getDimensions().x(); + const IndexType& ySize = grid->getDimensions().y(); + + CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, + ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); + + if( vertexCoordinates.x() <= grid->getDimensions().x() && + vertexCoordinates.y() <= grid->getDimensions().y() ) + { + if( ! grid->isBoundaryVertex( vertexCoordinates ) ) + { + EntitiesProcessor::processVertex( *grid, + *userData, + grid->getVertexIndex( vertexCoordinates ), + vertexCoordinates ); + } + } +} #endif @@ -555,9 +651,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >:: processBoundaryEntities( const GridType& grid, UserData& userData ) const { +#ifdef HAVE_CUDA /**** - * Boundary interior vertices + * Traversing boundary vertices */ + GridType* kernelGrid = tnlCuda::passToDevice( grid ); + UserData* kernelUserData = tnlCuda::passToDevice( userData ); + + dim3 cudaBlockSize( 16, 16 ); + dim3 cudaBlocks; + cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); + cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); + const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); + const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); + + for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) + for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) + { + tnlTraverserGrid2DBoundaryVertices< Real, Index, UserData, EntitiesProcessor > + <<< cudaBlocks, cudaBlockSize >>> + ( kernelGrid, + kernelUserData, + gridXIdx, + gridYIdx ); + checkCudaDevice; + } + cudaThreadSynchronize(); +#endif } @@ -570,9 +690,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >:: processInteriorEntities( const GridType& grid, UserData& userData ) const { +#ifdef HAVE_CUDA /**** - * Traversing interior vertices + * Traversing interior vertices */ + GridType* kernelGrid = tnlCuda::passToDevice( grid ); + UserData* kernelUserData = tnlCuda::passToDevice( userData ); + + dim3 cudaBlockSize( 16, 16 ); + dim3 cudaBlocks; + cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); + cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); + const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); + const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); + + for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) + for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) + { + tnlTraverserGrid2DInteriorVertices< Real, Index, UserData, EntitiesProcessor > + <<< cudaBlocks, cudaBlockSize >>> + ( kernelGrid, + kernelUserData, + gridXIdx, + gridYIdx ); + checkCudaDevice; + } + cudaThreadSynchronize(); +#endif } diff --git a/src/mesh/tnlTraverser_Grid3D_impl.h b/src/mesh/tnlTraverser_Grid3D_impl.h index 9cd074ca5fbad055e2e0f5c6420df935b767835b..739e12cd5e58f3e459911f118af61fb421e76cca 100644 --- a/src/mesh/tnlTraverser_Grid3D_impl.h +++ b/src/mesh/tnlTraverser_Grid3D_impl.h @@ -161,6 +161,37 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ + CoordinatesType coordinates; + const IndexType& xSize = grid.getDimensions().x(); + const IndexType& ySize = grid.getDimensions().y(); + const IndexType& zSize = grid.getDimensions().z(); + + for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ ) + for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) + { + coordinates.z() = 0; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + coordinates.z() = zSize; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + } + + for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ ) + for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) + { + coordinates.y() = 0; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + coordinates.y() = ySize; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + } + + for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ ) + for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ ) + { + coordinates.x() = 0; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + coordinates.x() = xSize; + EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); + } } template< typename Real, @@ -175,6 +206,21 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ + CoordinatesType coordinates; + const IndexType& xSize = grid.getDimensions().x(); + const IndexType& ySize = grid.getDimensions().y(); + const IndexType& zSize = grid.getDimensions().z(); + +#ifdef HAVE_OPENMP +//#pragma omp parallel for +#endif + for( coordinates.z() = 1; coordinates.z() < zSize; coordinates.z() ++ ) + for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) + for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) + { + const IndexType index = grid.getVertexIndex( coordinates ); + EntitiesProcessor::processVertex( grid, userData, index, coordinates ); + } } @@ -260,6 +306,81 @@ __global__ void tnlTraverserGrid3DInteriorCells( const tnlGrid< 3, Real, tnlCuda } } +template< typename Real, + typename Index, + typename UserData, + typename EntitiesProcessor > +__global__ void tnlTraverserGrid3DBoundaryVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid, + UserData* userData, + const Index gridXIdx, + const Index gridYIdx, + const Index gridZIdx ) +{ + typedef Real RealType; + typedef Index IndexType; + typedef tnlGrid< 3, Real, tnlCuda, Index > GridType; + typedef typename GridType::CoordinatesType CoordinatesType; + + const IndexType& xSize = grid->getDimensions().x(); + const IndexType& ySize = grid->getDimensions().y(); + const IndexType& zSize = grid->getDimensions().z(); + + CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, + ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y, + ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z ); + + if( vertexCoordinates.x() < grid->getDimensions().x() && + vertexCoordinates.y() < grid->getDimensions().y() && + vertexCoordinates.z() < grid->getDimensions().z() ) + { + if( grid->isBoundaryVertex( vertexCoordinates ) ) + { + EntitiesProcessor::processVertex( *grid, + *userData, + grid->getVertexIndex( vertexCoordinates ), + vertexCoordinates ); + } + } +} + +template< typename Real, + typename Index, + typename UserData, + typename EntitiesProcessor > +__global__ void tnlTraverserGrid3DInteriorVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid, + UserData* userData, + const Index gridXIdx, + const Index gridYIdx, + const Index gridZIdx ) +{ + typedef Real RealType; + typedef Index IndexType; + typedef tnlGrid< 3, Real, tnlCuda, Index > GridType; + typedef typename GridType::CoordinatesType CoordinatesType; + + const IndexType& xSize = grid->getDimensions().x(); + const IndexType& ySize = grid->getDimensions().y(); + const IndexType& zSize = grid->getDimensions().z(); + + CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, + ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y, + ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z ); + + if( vertexCoordinates.x() < grid->getDimensions().x() && + vertexCoordinates.y() < grid->getDimensions().y() && + vertexCoordinates.z() < grid->getDimensions().z() ) + { + if( ! grid->isBoundaryVertex( vertexCoordinates ) ) + { + EntitiesProcessor::processVertex( *grid, + *userData, + grid->getVertexIndex( vertexCoordinates ), + vertexCoordinates ); + } + } +} + + #endif template< typename Real, @@ -419,6 +540,35 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ +#ifdef HAVE_CUDA + GridType* kernelGrid = tnlCuda::passToDevice( grid ); + UserData* kernelUserData = tnlCuda::passToDevice( userData ); + + dim3 cudaBlockSize( 8, 8, 4 ); + dim3 cudaBlocks; + cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); + cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); + cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z ); + const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); + const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); + const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z ); + + for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) + for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) + for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) + { + tnlTraverserGrid3DBoundaryVertices< Real, Index, UserData, EntitiesProcessor > + <<< cudaBlocks, cudaBlockSize >>> + ( kernelGrid, + kernelUserData, + gridXIdx, + gridYIdx, + gridZIdx ); + } + cudaThreadSynchronize(); + checkCudaDevice; +#endif + } template< typename Real, @@ -433,6 +583,35 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ +#ifdef HAVE_CUDA + GridType* kernelGrid = tnlCuda::passToDevice( grid ); + UserData* kernelUserData = tnlCuda::passToDevice( userData ); + + dim3 cudaBlockSize( 8, 8, 4 ); + dim3 cudaBlocks; + cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); + cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); + cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z ); + const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); + const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); + const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z ); + + for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) + for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) + for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) + { + tnlTraverserGrid3DInteriorVertices< Real, Index, UserData, EntitiesProcessor > + <<< cudaBlocks, cudaBlockSize >>> + ( kernelGrid, + kernelUserData, + gridXIdx, + gridYIdx, + gridZIdx ); + } + cudaThreadSynchronize(); + checkCudaDevice; +#endif + }