Loading src/mesh/tnlTraverser_Grid2D_impl.h +147 −3 Original line number Diff line number Diff line Loading @@ -167,6 +167,25 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) { coordinates.y() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.y() = ySize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } for( coordinates.y() = 1; coordinates.y() <= ySize; coordinates.y() ++ ) { coordinates.x() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.x() = xSize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } } template< typename Real, Loading @@ -181,8 +200,20 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ } CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); #ifdef HAVE_OPENMP //#pragma omp parallel for #endif for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.getVertexIndex( coordinates ); EntitiesProcessor::processVertex( grid, userData, index, coordinates ); } } /*** * Loading Loading @@ -331,6 +362,71 @@ __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid2DBoundaryVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( vertexCoordinates.x() <= grid->getDimensions().x() && vertexCoordinates.y() <= grid->getDimensions().y() ) { if( grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid2DInteriorVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( vertexCoordinates.x() <= grid->getDimensions().x() && vertexCoordinates.y() <= grid->getDimensions().y() ) { if( ! grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } #endif Loading Loading @@ -555,9 +651,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >:: processBoundaryEntities( const GridType& grid, UserData& userData ) const { #ifdef HAVE_CUDA /**** * Boundary interior vertices * Traversing boundary vertices */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DBoundaryVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); checkCudaDevice; } cudaThreadSynchronize(); #endif } Loading @@ -570,9 +690,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >:: processInteriorEntities( const GridType& grid, UserData& userData ) const { #ifdef HAVE_CUDA /**** * Traversing interior vertices */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DInteriorVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); checkCudaDevice; } cudaThreadSynchronize(); #endif } Loading src/mesh/tnlTraverser_Grid3D_impl.h +179 −0 Original line number Diff line number Diff line Loading @@ -161,6 +161,37 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); const IndexType& zSize = grid.getDimensions().z(); for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ ) for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) { coordinates.z() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.z() = zSize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ ) for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) { coordinates.y() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.y() = ySize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ ) for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ ) { coordinates.x() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.x() = xSize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } } template< typename Real, Loading @@ -175,6 +206,21 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); const IndexType& zSize = grid.getDimensions().z(); #ifdef HAVE_OPENMP //#pragma omp parallel for #endif for( coordinates.z() = 1; coordinates.z() < zSize; coordinates.z() ++ ) for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.getVertexIndex( coordinates ); EntitiesProcessor::processVertex( grid, userData, index, coordinates ); } } Loading Loading @@ -260,6 +306,81 @@ __global__ void tnlTraverserGrid3DInteriorCells( const tnlGrid< 3, Real, tnlCuda } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid3DBoundaryVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx, const Index gridZIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 3, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); const IndexType& zSize = grid->getDimensions().z(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y, ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z ); if( vertexCoordinates.x() < grid->getDimensions().x() && vertexCoordinates.y() < grid->getDimensions().y() && vertexCoordinates.z() < grid->getDimensions().z() ) { if( grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid3DInteriorVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx, const Index gridZIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 3, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); const IndexType& zSize = grid->getDimensions().z(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y, ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z ); if( vertexCoordinates.x() < grid->getDimensions().x() && vertexCoordinates.y() < grid->getDimensions().y() && vertexCoordinates.z() < grid->getDimensions().z() ) { if( ! grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } #endif template< typename Real, Loading Loading @@ -419,6 +540,35 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ #ifdef HAVE_CUDA GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 8, 8, 4 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) { tnlTraverserGrid3DBoundaryVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx, gridZIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } template< typename Real, Loading @@ -433,6 +583,35 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ #ifdef HAVE_CUDA GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 8, 8, 4 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) { tnlTraverserGrid3DInteriorVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx, gridZIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } Loading Loading
src/mesh/tnlTraverser_Grid2D_impl.h +147 −3 Original line number Diff line number Diff line Loading @@ -167,6 +167,25 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) { coordinates.y() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.y() = ySize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } for( coordinates.y() = 1; coordinates.y() <= ySize; coordinates.y() ++ ) { coordinates.x() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.x() = xSize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } } template< typename Real, Loading @@ -181,8 +200,20 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ } CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); #ifdef HAVE_OPENMP //#pragma omp parallel for #endif for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.getVertexIndex( coordinates ); EntitiesProcessor::processVertex( grid, userData, index, coordinates ); } } /*** * Loading Loading @@ -331,6 +362,71 @@ __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid2DBoundaryVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( vertexCoordinates.x() <= grid->getDimensions().x() && vertexCoordinates.y() <= grid->getDimensions().y() ) { if( grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid2DInteriorVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( vertexCoordinates.x() <= grid->getDimensions().x() && vertexCoordinates.y() <= grid->getDimensions().y() ) { if( ! grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } #endif Loading Loading @@ -555,9 +651,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >:: processBoundaryEntities( const GridType& grid, UserData& userData ) const { #ifdef HAVE_CUDA /**** * Boundary interior vertices * Traversing boundary vertices */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DBoundaryVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); checkCudaDevice; } cudaThreadSynchronize(); #endif } Loading @@ -570,9 +690,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >:: processInteriorEntities( const GridType& grid, UserData& userData ) const { #ifdef HAVE_CUDA /**** * Traversing interior vertices */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DInteriorVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); checkCudaDevice; } cudaThreadSynchronize(); #endif } Loading
src/mesh/tnlTraverser_Grid3D_impl.h +179 −0 Original line number Diff line number Diff line Loading @@ -161,6 +161,37 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); const IndexType& zSize = grid.getDimensions().z(); for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ ) for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) { coordinates.z() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.z() = zSize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ ) for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ ) { coordinates.y() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.y() = ySize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ ) for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ ) { coordinates.x() = 0; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); coordinates.x() = xSize; EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates ); } } template< typename Real, Loading @@ -175,6 +206,21 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ CoordinatesType coordinates; const IndexType& xSize = grid.getDimensions().x(); const IndexType& ySize = grid.getDimensions().y(); const IndexType& zSize = grid.getDimensions().z(); #ifdef HAVE_OPENMP //#pragma omp parallel for #endif for( coordinates.z() = 1; coordinates.z() < zSize; coordinates.z() ++ ) for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.getVertexIndex( coordinates ); EntitiesProcessor::processVertex( grid, userData, index, coordinates ); } } Loading Loading @@ -260,6 +306,81 @@ __global__ void tnlTraverserGrid3DInteriorCells( const tnlGrid< 3, Real, tnlCuda } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid3DBoundaryVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx, const Index gridZIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 3, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); const IndexType& zSize = grid->getDimensions().z(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y, ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z ); if( vertexCoordinates.x() < grid->getDimensions().x() && vertexCoordinates.y() < grid->getDimensions().y() && vertexCoordinates.z() < grid->getDimensions().z() ) { if( grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor > __global__ void tnlTraverserGrid3DInteriorVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx, const Index gridZIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 3, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); const IndexType& zSize = grid->getDimensions().z(); CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y, ( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z ); if( vertexCoordinates.x() < grid->getDimensions().x() && vertexCoordinates.y() < grid->getDimensions().y() && vertexCoordinates.z() < grid->getDimensions().z() ) { if( ! grid->isBoundaryVertex( vertexCoordinates ) ) { EntitiesProcessor::processVertex( *grid, *userData, grid->getVertexIndex( vertexCoordinates ), vertexCoordinates ); } } } #endif template< typename Real, Loading Loading @@ -419,6 +540,35 @@ processBoundaryEntities( const GridType& grid, /**** * Traversing boundary vertices */ #ifdef HAVE_CUDA GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 8, 8, 4 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) { tnlTraverserGrid3DBoundaryVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx, gridZIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } template< typename Real, Loading @@ -433,6 +583,35 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior vertices */ #ifdef HAVE_CUDA GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 8, 8, 4 ); dim3 cudaBlocks; cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z ); const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) { tnlTraverserGrid3DInteriorVertices< Real, Index, UserData, EntitiesProcessor > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx, gridZIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } Loading