Commit 5c6b42f6 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed 2D grid traverser for CUDA

parent 20b876ed
Loading
Loading
Loading
Loading
+21 −26
Original line number Diff line number Diff line
@@ -151,11 +151,9 @@ processEntities(
   CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end );
   CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation );
   CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis );
   //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( *gridPointer );
   //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( *gridPointer );
   UserData* kernelUserData = Devices::Cuda::passToDevice( userData );

   //GridEntity entity( gridPointer.template getData< tnlCuda >(), coordinates, *entityOrientation, *entityBasis );
   
   Devices::Cuda::synchronizeDevice();
   if( processOnlyBoundaryEntities )
   {
@@ -281,24 +279,23 @@ template< typename Real,
          typename EntitiesProcessor,
          bool processOnlyBoundaryEntities >
__global__ void 
GridTraverser2D( GridEntity entity,
   /*const Meshes::Grid< 2, Real, tnlCuda, Index >* grid,*/
GridTraverser2D(
   const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid,
   UserData* userData,
   const typename GridEntity::CoordinatesType* begin,
   const typename GridEntity::CoordinatesType* end,
   /*const typename GridEntity::CoordinatesType* entityOrientation,
   const typename GridEntity::CoordinatesType* entityBasis,*/
   const typename GridEntity::CoordinatesType* entityOrientation,
   const typename GridEntity::CoordinatesType* entityBasis,
   const Index gridXIdx,
   const Index gridYIdx )
{
   typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
   typename GridType::CoordinatesType coordinates;

   entity.getCoordinates().x() = begin->x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
   entity.getCoordinates().y() = begin->y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;  
   
   //GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis );
   coordinates.x() = begin->x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
   coordinates.y() = begin->y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;  
   
   GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis );

   if( entity.getCoordinates().x() <= end->x() &&
       entity.getCoordinates().y() <= end->y() )
@@ -337,15 +334,12 @@ processEntities(
#ifdef HAVE_CUDA   
   CoordinatesType* kernelBegin = Devices::Cuda::passToDevice( begin );
   CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end );
   //CoordinatesType* kernelEntityOrientation = tnlCuda::passToDevice( entityOrientation );
   //CoordinatesType* kernelEntityBasis = tnlCuda::passToDevice( entityBasis );
   //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( *gridPointer );
   CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation );
   CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis );
   //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( *gridPointer );
   UserData* kernelUserData = Devices::Cuda::passToDevice( userData );
   checkCudaDevice;   

   CoordinatesType coordinates( 0 );
   GridEntity entity( gridPointer.template getData< Devices::Cuda >(), coordinates, entityOrientation, entityBasis );
      
   dim3 cudaBlockSize( 16, 16 );
   dim3 cudaBlocks;
   cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
@@ -353,26 +347,27 @@ processEntities(
   const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
   const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y );

   
   Devices::Cuda::synchronizeDevice();
   for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
      for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
         GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities >
            <<< cudaBlocks, cudaBlockSize >>>
            ( entity, 
            ( &gridPointer.template getData< Devices::Cuda >(),
              kernelUserData,
              kernelBegin,
              kernelEnd,
              kernelEntityOrientation,
              kernelEntityBasis,
              gridXIdx,
              gridYIdx );
 
   cudaThreadSynchronize();
   checkCudaDevice;
   //tnlCuda::freeFromDevice( kernelGrid );
   //Devices::Cuda::freeFromDevice( kernelGrid );
   Devices::Cuda::freeFromDevice( kernelBegin );
   Devices::Cuda::freeFromDevice( kernelEnd );
   //tnlCuda::freeFromDevice( kernelEntityOrientation );
   //tnlCuda::freeFromDevice( kernelEntityBasis );
   Devices::Cuda::freeFromDevice( kernelEntityOrientation );
   Devices::Cuda::freeFromDevice( kernelEntityBasis );
   Devices::Cuda::freeFromDevice( kernelUserData );
   checkCudaDevice;
#endif
@@ -543,7 +538,7 @@ processEntities(
   CoordinatesType* kernelEnd = Devices::Cuda::passToDevice( end );
   CoordinatesType* kernelEntityOrientation = Devices::Cuda::passToDevice( entityOrientation );
   CoordinatesType* kernelEntityBasis = Devices::Cuda::passToDevice( entityBasis );
   //typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid );
   //typename GridEntity::MeshType* kernelGrid = Devices::Cuda::passToDevice( grid );
   UserData* kernelUserData = Devices::Cuda::passToDevice( userData );
      
   dim3 cudaBlockSize( 8, 8, 8 );