Loading src/mesh/tnlTraverser_Grid2D_impl.h +190 −4 Original line number Diff line number Diff line Loading @@ -98,15 +98,19 @@ processBoundaryEntities( const GridType& grid, { coordinates.y() = 0; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 0, 1 >( coordinates ), coordinates ); //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 0, 1 >( coordinates ) << endl; coordinates.y() = ySize; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 0, 1 >( coordinates ), coordinates ); //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 0, 1 >( coordinates ) << endl; } for( coordinates.y() = 0; coordinates.y() < ySize; coordinates.y() ++ ) { coordinates.x() = 0; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 1, 0 >( coordinates ), coordinates ); coordinates.x() = ySize; //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 1, 0 >( coordinates ) << endl; coordinates.x() = xSize; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 1, 0 >( coordinates ), coordinates ); //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 1, 0 >( coordinates ) << endl; } } Loading @@ -130,17 +134,23 @@ processInteriorEntities( const GridType& grid, #ifdef HAVE_OPENMP //#pragma omp parallel for #endif for( coordinates.y() = 1; coordinates.y() < ySize - 1; coordinates.y() ++ ) //cout << "< 1, 0 >" << endl; for( coordinates.y() = 0; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.template getFaceIndex< 1, 0 >( coordinates ); EntitiesProcessor::processFace( grid, userData, index, coordinates ); //cout << "Interior face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 1, 0 >( coordinates ) << endl; } //cout << "< 0, 1 >" << endl; for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize - 1; coordinates.x() ++ ) for( coordinates.x() = 0; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.template getFaceIndex< 0, 1 >( coordinates ); EntitiesProcessor::processFace( grid, userData, index, coordinates ); //cout << "Interior face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 0, 1 >( coordinates ) << endl; } } Loading Loading @@ -249,6 +259,80 @@ __global__ void tnlTraverserGrid2DInteriorCells( const tnlGrid< 2, Real, tnlCuda } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor, int nx, int ny > __global__ void tnlTraverserGrid2DBoundaryFaces( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType faceCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( faceCoordinates.x() < grid->getDimensions().x() + nx && faceCoordinates.y() < grid->getDimensions().y() + ny ) { if( grid->isBoundaryFace( faceCoordinates ) ) { //printf( "Processing boundary conditions at %d %d \n", cellCoordinates.x(), cellCoordinates.y() ); EntitiesProcessor::processFace( *grid, *userData, grid->template getFaceIndex< nx, ny >( cellCoordinates ), cellCoordinates ); } } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor, int nx, int ny > __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType cellCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( cellCoordinates.x() < grid->getDimensions().x() + nx && cellCoordinates.y() < grid->getDimensions().y() + ny ) { if( ! grid->isBoundaryFace( cellCoordinates ) ) { //printf( "Processing interior conditions at %d %d \n", cellCoordinates.x(), cellCoordinates.y() ); EntitiesProcessor::processFace( *grid, *userData, grid->template getFaceIndex< nx, ny >( faceCoordinates ), cellCoordinates ); } } } #endif template< typename Real, Loading Loading @@ -338,9 +422,59 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 1 >:: processBoundaryEntities( const GridType& grid, UserData& userData ) const { #ifdef HAVE_CUDA /**** * Traversing boundary faces * Boundary conditions */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; IndexType cudaXGrids, cudaYGrids; /**** * < 1, 0 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y(), cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DBoundaryFaces< Real, Index, UserData, EntitiesProcessor, 1, 0 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; /**** * < 0, 1 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x(), cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DBoundaryFaces< Real, Index, UserData, EntitiesProcessor, 0, 1 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } template< typename Real, Loading @@ -355,6 +489,58 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior faces */ #ifdef HAVE_CUDA /**** * Boundary conditions */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; IndexType cudaXGrids, cudaYGrids; /**** * < 1, 0 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y(), cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DInteriorFaces< Real, Index, UserData, EntitiesProcessor, 1, 0 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; /**** * < 0, 1 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x(), cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DInteriorFaces< Real, Index, UserData, EntitiesProcessor, 0, 1 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } template< typename Real, Loading src/solvers/pde/tnlLinearSystemAssembler.h +1 −1 Original line number Diff line number Diff line Loading @@ -292,7 +292,7 @@ class tnlLinearSystemAssembler< tnlGrid< Dimensions, Real, Device, Index >, const IndexType index, const CoordinatesType& coordinates ) { printf( "index = %d \n", index ); //printf( "index = %d \n", index ); typedef tnlFunctionAdapter< MeshType, RightHandSide > FunctionAdapter; ( *userData.b )[ index ] = ( *userData.u )[ index ] + ( *userData.tau ) * FunctionAdapter::getValue( mesh, Loading Loading
src/mesh/tnlTraverser_Grid2D_impl.h +190 −4 Original line number Diff line number Diff line Loading @@ -98,15 +98,19 @@ processBoundaryEntities( const GridType& grid, { coordinates.y() = 0; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 0, 1 >( coordinates ), coordinates ); //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 0, 1 >( coordinates ) << endl; coordinates.y() = ySize; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 0, 1 >( coordinates ), coordinates ); //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 0, 1 >( coordinates ) << endl; } for( coordinates.y() = 0; coordinates.y() < ySize; coordinates.y() ++ ) { coordinates.x() = 0; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 1, 0 >( coordinates ), coordinates ); coordinates.x() = ySize; //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 1, 0 >( coordinates ) << endl; coordinates.x() = xSize; EntitiesProcessor::processFace( grid, userData, grid.template getFaceIndex< 1, 0 >( coordinates ), coordinates ); //cout << "Boundary face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 1, 0 >( coordinates ) << endl; } } Loading @@ -130,17 +134,23 @@ processInteriorEntities( const GridType& grid, #ifdef HAVE_OPENMP //#pragma omp parallel for #endif for( coordinates.y() = 1; coordinates.y() < ySize - 1; coordinates.y() ++ ) //cout << "< 1, 0 >" << endl; for( coordinates.y() = 0; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.template getFaceIndex< 1, 0 >( coordinates ); EntitiesProcessor::processFace( grid, userData, index, coordinates ); //cout << "Interior face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 1, 0 >( coordinates ) << endl; } //cout << "< 0, 1 >" << endl; for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ ) for( coordinates.x() = 1; coordinates.x() < xSize - 1; coordinates.x() ++ ) for( coordinates.x() = 0; coordinates.x() < xSize; coordinates.x() ++ ) { const IndexType index = grid.template getFaceIndex< 0, 1 >( coordinates ); EntitiesProcessor::processFace( grid, userData, index, coordinates ); //cout << "Interior face coordinates = " << coordinates << " index = " << grid.template getFaceIndex< 0, 1 >( coordinates ) << endl; } } Loading Loading @@ -249,6 +259,80 @@ __global__ void tnlTraverserGrid2DInteriorCells( const tnlGrid< 2, Real, tnlCuda } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor, int nx, int ny > __global__ void tnlTraverserGrid2DBoundaryFaces( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType faceCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( faceCoordinates.x() < grid->getDimensions().x() + nx && faceCoordinates.y() < grid->getDimensions().y() + ny ) { if( grid->isBoundaryFace( faceCoordinates ) ) { //printf( "Processing boundary conditions at %d %d \n", cellCoordinates.x(), cellCoordinates.y() ); EntitiesProcessor::processFace( *grid, *userData, grid->template getFaceIndex< nx, ny >( cellCoordinates ), cellCoordinates ); } } } template< typename Real, typename Index, typename UserData, typename EntitiesProcessor, int nx, int ny > __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda, Index >* grid, UserData* userData, const Index gridXIdx, const Index gridYIdx ) { typedef Real RealType; typedef Index IndexType; typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::CoordinatesType CoordinatesType; const IndexType& xSize = grid->getDimensions().x(); const IndexType& ySize = grid->getDimensions().y(); CoordinatesType cellCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x, ( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y ); if( cellCoordinates.x() < grid->getDimensions().x() + nx && cellCoordinates.y() < grid->getDimensions().y() + ny ) { if( ! grid->isBoundaryFace( cellCoordinates ) ) { //printf( "Processing interior conditions at %d %d \n", cellCoordinates.x(), cellCoordinates.y() ); EntitiesProcessor::processFace( *grid, *userData, grid->template getFaceIndex< nx, ny >( faceCoordinates ), cellCoordinates ); } } } #endif template< typename Real, Loading Loading @@ -338,9 +422,59 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 1 >:: processBoundaryEntities( const GridType& grid, UserData& userData ) const { #ifdef HAVE_CUDA /**** * Traversing boundary faces * Boundary conditions */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; IndexType cudaXGrids, cudaYGrids; /**** * < 1, 0 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y(), cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DBoundaryFaces< Real, Index, UserData, EntitiesProcessor, 1, 0 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; /**** * < 0, 1 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x(), cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DBoundaryFaces< Real, Index, UserData, EntitiesProcessor, 0, 1 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } template< typename Real, Loading @@ -355,6 +489,58 @@ processInteriorEntities( const GridType& grid, /**** * Traversing interior faces */ #ifdef HAVE_CUDA /**** * Boundary conditions */ GridType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; IndexType cudaXGrids, cudaYGrids; /**** * < 1, 0 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y(), cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DInteriorFaces< Real, Index, UserData, EntitiesProcessor, 1, 0 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; /**** * < 0, 1 > faces */ cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x(), cudaBlockSize.x ); cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y ); cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) { tnlTraverserGrid2DInteriorFaces< Real, Index, UserData, EntitiesProcessor, 0, 1 > <<< cudaBlocks, cudaBlockSize >>> ( kernelGrid, kernelUserData, gridXIdx, gridYIdx ); } cudaThreadSynchronize(); checkCudaDevice; #endif } template< typename Real, Loading
src/solvers/pde/tnlLinearSystemAssembler.h +1 −1 Original line number Diff line number Diff line Loading @@ -292,7 +292,7 @@ class tnlLinearSystemAssembler< tnlGrid< Dimensions, Real, Device, Index >, const IndexType index, const CoordinatesType& coordinates ) { printf( "index = %d \n", index ); //printf( "index = %d \n", index ); typedef tnlFunctionAdapter< MeshType, RightHandSide > FunctionAdapter; ( *userData.b )[ index ] = ( *userData.u )[ index ] + ( *userData.tau ) * FunctionAdapter::getValue( mesh, Loading