Traversing of grid vertices and faces was added. (41091991) · Commits · TNL / tnl-dev

src/mesh/tnlTraverser_Grid2D_impl.h

+147 −3

Original line number	Diff line number	Diff line
		@@ -167,6 +167,25 @@ processBoundaryEntities( const GridType& grid,
		/****
		* Traversing boundary vertices
		*/
		CoordinatesType coordinates;
		const IndexType& xSize = grid.getDimensions().x();
		const IndexType& ySize = grid.getDimensions().y();

		for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
		{
		coordinates.y() = 0;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		coordinates.y() = ySize;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		}
		for( coordinates.y() = 1; coordinates.y() <= ySize; coordinates.y() ++ )
		{
		coordinates.x() = 0;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		coordinates.x() = xSize;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		}

		}

		template< typename Real,
		@@ -181,8 +200,20 @@ processInteriorEntities( const GridType& grid,
		/****
		* Traversing interior vertices
		*/
		}
		CoordinatesType coordinates;
		const IndexType& xSize = grid.getDimensions().x();
		const IndexType& ySize = grid.getDimensions().y();

		#ifdef HAVE_OPENMP
		//#pragma omp parallel for
		#endif
		for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ )
		for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ )
		{
		const IndexType index = grid.getVertexIndex( coordinates );
		EntitiesProcessor::processVertex( grid, userData, index, coordinates );
		}
		}

		/***
		*
		@@ -331,6 +362,71 @@ __global__ void tnlTraverserGrid2DInteriorFaces( const tnlGrid< 2, Real, tnlCuda
		}
		}

		template< typename Real,
		typename Index,
		typename UserData,
		typename EntitiesProcessor >
		__global__ void tnlTraverserGrid2DBoundaryVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid,
		UserData* userData,
		const Index gridXIdx,
		const Index gridYIdx )
		{
		typedef Real RealType;
		typedef Index IndexType;
		typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
		typedef typename GridType::CoordinatesType CoordinatesType;

		const IndexType& xSize = grid->getDimensions().x();
		const IndexType& ySize = grid->getDimensions().y();

		CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
		( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y );

		if( vertexCoordinates.x() <= grid->getDimensions().x() &&
		vertexCoordinates.y() <= grid->getDimensions().y() )
		{
		if( grid->isBoundaryVertex( vertexCoordinates ) )
		{
		EntitiesProcessor::processVertex( *grid,
		*userData,
		grid->getVertexIndex( vertexCoordinates ),
		vertexCoordinates );
		}
		}
		}

		template< typename Real,
		typename Index,
		typename UserData,
		typename EntitiesProcessor >
		__global__ void tnlTraverserGrid2DInteriorVertices( const tnlGrid< 2, Real, tnlCuda, Index >* grid,
		UserData* userData,
		const Index gridXIdx,
		const Index gridYIdx )
		{
		typedef Real RealType;
		typedef Index IndexType;
		typedef tnlGrid< 2, Real, tnlCuda, Index > GridType;
		typedef typename GridType::CoordinatesType CoordinatesType;

		const IndexType& xSize = grid->getDimensions().x();
		const IndexType& ySize = grid->getDimensions().y();

		CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
		( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y );

		if( vertexCoordinates.x() <= grid->getDimensions().x() &&
		vertexCoordinates.y() <= grid->getDimensions().y() )
		{
		if( ! grid->isBoundaryVertex( vertexCoordinates ) )
		{
		EntitiesProcessor::processVertex( *grid,
		*userData,
		grid->getVertexIndex( vertexCoordinates ),
		vertexCoordinates );
		}
		}
		}


		#endif
		@@ -555,9 +651,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >::
		processBoundaryEntities( const GridType& grid,
		UserData& userData ) const
		{
		#ifdef HAVE_CUDA
		/****
		* Boundary interior vertices
		* Traversing boundary vertices
		*/
		GridType* kernelGrid = tnlCuda::passToDevice( grid );
		UserData* kernelUserData = tnlCuda::passToDevice( userData );

		dim3 cudaBlockSize( 16, 16 );
		dim3 cudaBlocks;
		cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
		cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
		const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
		const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );

		for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
		for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
		{
		tnlTraverserGrid2DBoundaryVertices< Real, Index, UserData, EntitiesProcessor >
		<<< cudaBlocks, cudaBlockSize >>>
		( kernelGrid,
		kernelUserData,
		gridXIdx,
		gridYIdx );
		checkCudaDevice;
		}
		cudaThreadSynchronize();
		#endif
		}


		@@ -570,9 +690,33 @@ tnlTraverser< tnlGrid< 2, Real, tnlCuda, Index >, 0 >::
		processInteriorEntities( const GridType& grid,
		UserData& userData ) const
		{
		#ifdef HAVE_CUDA
		/****
		* Traversing interior vertices
		*/
		GridType* kernelGrid = tnlCuda::passToDevice( grid );
		UserData* kernelUserData = tnlCuda::passToDevice( userData );

		dim3 cudaBlockSize( 16, 16 );
		dim3 cudaBlocks;
		cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
		cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
		const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
		const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );

		for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
		for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
		{
		tnlTraverserGrid2DInteriorVertices< Real, Index, UserData, EntitiesProcessor >
		<<< cudaBlocks, cudaBlockSize >>>
		( kernelGrid,
		kernelUserData,
		gridXIdx,
		gridYIdx );
		checkCudaDevice;
		}
		cudaThreadSynchronize();
		#endif
		}

src/mesh/tnlTraverser_Grid3D_impl.h

+179 −0

Original line number	Diff line number	Diff line
		@@ -161,6 +161,37 @@ processBoundaryEntities( const GridType& grid,
		/****
		* Traversing boundary vertices
		*/
		CoordinatesType coordinates;
		const IndexType& xSize = grid.getDimensions().x();
		const IndexType& ySize = grid.getDimensions().y();
		const IndexType& zSize = grid.getDimensions().z();

		for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ )
		for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
		{
		coordinates.z() = 0;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		coordinates.z() = zSize;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		}

		for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ )
		for( coordinates.x() = 0; coordinates.x() <= xSize; coordinates.x() ++ )
		{
		coordinates.y() = 0;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		coordinates.y() = ySize;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		}

		for( coordinates.z() = 0; coordinates.z() <= zSize; coordinates.z() ++ )
		for( coordinates.y() = 0; coordinates.y() <= ySize; coordinates.y() ++ )
		{
		coordinates.x() = 0;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		coordinates.x() = xSize;
		EntitiesProcessor::processVertex( grid, userData, grid.getVertexIndex( coordinates ), coordinates );
		}
		}

		template< typename Real,
		@@ -175,6 +206,21 @@ processInteriorEntities( const GridType& grid,
		/****
		* Traversing interior vertices
		*/
		CoordinatesType coordinates;
		const IndexType& xSize = grid.getDimensions().x();
		const IndexType& ySize = grid.getDimensions().y();
		const IndexType& zSize = grid.getDimensions().z();

		#ifdef HAVE_OPENMP
		//#pragma omp parallel for
		#endif
		for( coordinates.z() = 1; coordinates.z() < zSize; coordinates.z() ++ )
		for( coordinates.y() = 1; coordinates.y() < ySize; coordinates.y() ++ )
		for( coordinates.x() = 1; coordinates.x() < xSize; coordinates.x() ++ )
		{
		const IndexType index = grid.getVertexIndex( coordinates );
		EntitiesProcessor::processVertex( grid, userData, index, coordinates );
		}
		}


		@@ -260,6 +306,81 @@ __global__ void tnlTraverserGrid3DInteriorCells( const tnlGrid< 3, Real, tnlCuda
		}
		}

		template< typename Real,
		typename Index,
		typename UserData,
		typename EntitiesProcessor >
		__global__ void tnlTraverserGrid3DBoundaryVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid,
		UserData* userData,
		const Index gridXIdx,
		const Index gridYIdx,
		const Index gridZIdx )
		{
		typedef Real RealType;
		typedef Index IndexType;
		typedef tnlGrid< 3, Real, tnlCuda, Index > GridType;
		typedef typename GridType::CoordinatesType CoordinatesType;

		const IndexType& xSize = grid->getDimensions().x();
		const IndexType& ySize = grid->getDimensions().y();
		const IndexType& zSize = grid->getDimensions().z();

		CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
		( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y,
		( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z );

		if( vertexCoordinates.x() < grid->getDimensions().x() &&
		vertexCoordinates.y() < grid->getDimensions().y() &&
		vertexCoordinates.z() < grid->getDimensions().z() )
		{
		if( grid->isBoundaryVertex( vertexCoordinates ) )
		{
		EntitiesProcessor::processVertex( *grid,
		*userData,
		grid->getVertexIndex( vertexCoordinates ),
		vertexCoordinates );
		}
		}
		}

		template< typename Real,
		typename Index,
		typename UserData,
		typename EntitiesProcessor >
		__global__ void tnlTraverserGrid3DInteriorVertices( const tnlGrid< 3, Real, tnlCuda, Index >* grid,
		UserData* userData,
		const Index gridXIdx,
		const Index gridYIdx,
		const Index gridZIdx )
		{
		typedef Real RealType;
		typedef Index IndexType;
		typedef tnlGrid< 3, Real, tnlCuda, Index > GridType;
		typedef typename GridType::CoordinatesType CoordinatesType;

		const IndexType& xSize = grid->getDimensions().x();
		const IndexType& ySize = grid->getDimensions().y();
		const IndexType& zSize = grid->getDimensions().z();

		CoordinatesType vertexCoordinates( ( gridXIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x,
		( gridYIdx * tnlCuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y,
		( gridZIdx * tnlCuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z );

		if( vertexCoordinates.x() < grid->getDimensions().x() &&
		vertexCoordinates.y() < grid->getDimensions().y() &&
		vertexCoordinates.z() < grid->getDimensions().z() )
		{
		if( ! grid->isBoundaryVertex( vertexCoordinates ) )
		{
		EntitiesProcessor::processVertex( *grid,
		*userData,
		grid->getVertexIndex( vertexCoordinates ),
		vertexCoordinates );
		}
		}
		}


		#endif

		template< typename Real,
		@@ -419,6 +540,35 @@ processBoundaryEntities( const GridType& grid,
		/****
		* Traversing boundary vertices
		*/
		#ifdef HAVE_CUDA
		GridType* kernelGrid = tnlCuda::passToDevice( grid );
		UserData* kernelUserData = tnlCuda::passToDevice( userData );

		dim3 cudaBlockSize( 8, 8, 4 );
		dim3 cudaBlocks;
		cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
		cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
		cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z );
		const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
		const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
		const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z );

		for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
		for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
		for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
		{
		tnlTraverserGrid3DBoundaryVertices< Real, Index, UserData, EntitiesProcessor >
		<<< cudaBlocks, cudaBlockSize >>>
		( kernelGrid,
		kernelUserData,
		gridXIdx,
		gridYIdx,
		gridZIdx );
		}
		cudaThreadSynchronize();
		checkCudaDevice;
		#endif

		}

		template< typename Real,
		@@ -433,6 +583,35 @@ processInteriorEntities( const GridType& grid,
		/****
		* Traversing interior vertices
		*/
		#ifdef HAVE_CUDA
		GridType* kernelGrid = tnlCuda::passToDevice( grid );
		UserData* kernelUserData = tnlCuda::passToDevice( userData );

		dim3 cudaBlockSize( 8, 8, 4 );
		dim3 cudaBlocks;
		cudaBlocks.x = tnlCuda::getNumberOfBlocks( grid.getDimensions().x() + 1, cudaBlockSize.x );
		cudaBlocks.y = tnlCuda::getNumberOfBlocks( grid.getDimensions().y() + 1, cudaBlockSize.y );
		cudaBlocks.z = tnlCuda::getNumberOfBlocks( grid.getDimensions().z() + 1, cudaBlockSize.z );
		const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x );
		const IndexType cudaYGrids = tnlCuda::getNumberOfGrids( cudaBlocks.y );
		const IndexType cudaZGrids = tnlCuda::getNumberOfGrids( cudaBlocks.z );

		for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
		for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
		for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
		{
		tnlTraverserGrid3DInteriorVertices< Real, Index, UserData, EntitiesProcessor >
		<<< cudaBlocks, cudaBlockSize >>>
		( kernelGrid,
		kernelUserData,
		gridXIdx,
		gridYIdx,
		gridZIdx );
		}
		cudaThreadSynchronize();
		checkCudaDevice;
		#endif

		}