Loading src/TNL/Meshes/GridDetails/GridTraverser_impl.h +21 −26 Original line number Diff line number Diff line Loading @@ -151,11 +151,9 @@ processEntities( CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end ); CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation ); CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( *gridPointer ); //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( *gridPointer ); UserData* kernelUserData = Devices::Cuda::passToDevice( userData ); //GridEntity entity( gridPointer.template getData< tnlCuda >(), coordinates, *entityOrientation, *entityBasis ); Devices::Cuda::synchronizeDevice(); if( processOnlyBoundaryEntities ) { Loading Loading @@ -281,24 +279,23 @@ template< typename Real, typename EntitiesProcessor, bool processOnlyBoundaryEntities > __global__ void GridTraverser2D( GridEntity entity, /*const Meshes::Grid< 2, Real, tnlCuda, Index >* grid,*/ GridTraverser2D( const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid, UserData* userData, const typename GridEntity::CoordinatesType* begin, const typename GridEntity::CoordinatesType* end, /*const typename GridEntity::CoordinatesType* entityOrientation, const typename GridEntity::CoordinatesType* entityBasis,*/ const typename GridEntity::CoordinatesType* entityOrientation, const typename GridEntity::CoordinatesType* entityBasis, const Index gridXIdx, const Index gridYIdx ) { typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; entity.getCoordinates().x() = begin->x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().y() = begin->y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; //GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis ); coordinates.x() = begin->x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; coordinates.y() = begin->y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis ); if( entity.getCoordinates().x() <= end->x() && entity.getCoordinates().y() <= end->y() ) Loading Loading @@ -337,15 +334,12 @@ processEntities( #ifdef HAVE_CUDA CoordinatesType* kernelBegin = Devices::Cuda::passToDevice( begin ); CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end ); //CoordinatesType* kernelEntityOrientation = tnlCuda::passToDevice( entityOrientation ); //CoordinatesType* kernelEntityBasis = tnlCuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( *gridPointer ); CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation ); CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( *gridPointer ); UserData* kernelUserData = Devices::Cuda::passToDevice( userData ); checkCudaDevice; CoordinatesType coordinates( 0 ); GridEntity entity( gridPointer.template getData< Devices::Cuda >(), coordinates, entityOrientation, entityBasis ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); Loading @@ -353,26 +347,27 @@ processEntities( const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y ); Devices::Cuda::synchronizeDevice(); for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities > <<< cudaBlocks, cudaBlockSize >>> ( entity, ( &gridPointer.template getData< Devices::Cuda >(), kernelUserData, kernelBegin, kernelEnd, kernelEntityOrientation, kernelEntityBasis, gridXIdx, gridYIdx ); cudaThreadSynchronize(); checkCudaDevice; //tnlCuda::freeFromDevice( kernelGrid ); //Devices::Cuda::freeFromDevice( kernelGrid ); Devices::Cuda::freeFromDevice( kernelBegin ); Devices::Cuda::freeFromDevice( kernelEnd ); //tnlCuda::freeFromDevice( kernelEntityOrientation ); //tnlCuda::freeFromDevice( kernelEntityBasis ); Devices::Cuda::freeFromDevice( kernelEntityOrientation ); Devices::Cuda::freeFromDevice( kernelEntityBasis ); Devices::Cuda::freeFromDevice( kernelUserData ); checkCudaDevice; #endif Loading Loading @@ -543,7 +538,7 @@ processEntities( CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end ); CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation ); CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid ); //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( grid ); UserData* kernelUserData = Devices::Cuda::passToDevice( userData ); dim3 cudaBlockSize( 8, 8, 8 ); Loading Loading
src/TNL/Meshes/GridDetails/GridTraverser_impl.h +21 −26 Original line number Diff line number Diff line Loading @@ -151,11 +151,9 @@ processEntities( CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end ); CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation ); CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( *gridPointer ); //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( *gridPointer ); UserData* kernelUserData = Devices::Cuda::passToDevice( userData ); //GridEntity entity( gridPointer.template getData< tnlCuda >(), coordinates, *entityOrientation, *entityBasis ); Devices::Cuda::synchronizeDevice(); if( processOnlyBoundaryEntities ) { Loading Loading @@ -281,24 +279,23 @@ template< typename Real, typename EntitiesProcessor, bool processOnlyBoundaryEntities > __global__ void GridTraverser2D( GridEntity entity, /*const Meshes::Grid< 2, Real, tnlCuda, Index >* grid,*/ GridTraverser2D( const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid, UserData* userData, const typename GridEntity::CoordinatesType* begin, const typename GridEntity::CoordinatesType* end, /*const typename GridEntity::CoordinatesType* entityOrientation, const typename GridEntity::CoordinatesType* entityBasis,*/ const typename GridEntity::CoordinatesType* entityOrientation, const typename GridEntity::CoordinatesType* entityBasis, const Index gridXIdx, const Index gridYIdx ) { typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; entity.getCoordinates().x() = begin->x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().y() = begin->y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; //GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis ); coordinates.x() = begin->x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; coordinates.y() = begin->y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis ); if( entity.getCoordinates().x() <= end->x() && entity.getCoordinates().y() <= end->y() ) Loading Loading @@ -337,15 +334,12 @@ processEntities( #ifdef HAVE_CUDA CoordinatesType* kernelBegin = Devices::Cuda::passToDevice( begin ); CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end ); //CoordinatesType* kernelEntityOrientation = tnlCuda::passToDevice( entityOrientation ); //CoordinatesType* kernelEntityBasis = tnlCuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( *gridPointer ); CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation ); CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( *gridPointer ); UserData* kernelUserData = Devices::Cuda::passToDevice( userData ); checkCudaDevice; CoordinatesType coordinates( 0 ); GridEntity entity( gridPointer.template getData< Devices::Cuda >(), coordinates, entityOrientation, entityBasis ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); Loading @@ -353,26 +347,27 @@ processEntities( const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x ); const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y ); Devices::Cuda::synchronizeDevice(); for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities > <<< cudaBlocks, cudaBlockSize >>> ( entity, ( &gridPointer.template getData< Devices::Cuda >(), kernelUserData, kernelBegin, kernelEnd, kernelEntityOrientation, kernelEntityBasis, gridXIdx, gridYIdx ); cudaThreadSynchronize(); checkCudaDevice; //tnlCuda::freeFromDevice( kernelGrid ); //Devices::Cuda::freeFromDevice( kernelGrid ); Devices::Cuda::freeFromDevice( kernelBegin ); Devices::Cuda::freeFromDevice( kernelEnd ); //tnlCuda::freeFromDevice( kernelEntityOrientation ); //tnlCuda::freeFromDevice( kernelEntityBasis ); Devices::Cuda::freeFromDevice( kernelEntityOrientation ); Devices::Cuda::freeFromDevice( kernelEntityBasis ); Devices::Cuda::freeFromDevice( kernelUserData ); checkCudaDevice; #endif Loading Loading @@ -543,7 +538,7 @@ processEntities( CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end ); CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation ); CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis ); //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid ); //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( grid ); UserData* kernelUserData = Devices::Cuda::passToDevice( userData ); dim3 cudaBlockSize( 8, 8, 8 ); Loading