Tweaks (042a23f5) · Commits · TNL / tnl-dev

examples/fast-sweeping-map/main.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -17,9 +17,9 @@

		#include "MainBuildConfig.h"
		//for HOST versions:
		#include "tnlFastSweepingMap.h"
		//#include "tnlFastSweepingMap.h"
		//for DEVICE versions:
		//#include "tnlFastSweepingMap_CUDA.h"
		#include "tnlFastSweepingMap_CUDA.h"
		#include "fastSweepingMapConfig.h"
		#include <solvers/tnlBuildConfigTags.h>

examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -904,6 +904,8 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS
		double fu = 0.0;

		double finalTime = this->stopTime;
		if(boundaryCondition == 0)
		finalTime*=2.0;
		__syncthreads();

		tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);

examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -25,7 +25,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device>
		tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver()
		{
		cout << "a" << endl;
		this->device = tnlHostDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU
		this->device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU

		#ifdef HAVE_CUDA
		if(this->device == tnlCudaDevice)

examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h

+70 −53

Original line number	Diff line number	Diff line
		@@ -16,7 +16,7 @@
		#ifndef TNLNARROWBAND2D_IMPL_H_
		#define TNLNARROWBAND2D_IMPL_H_

		#define NARROWBAND_SUBGRID_SIZE 8
		#define NARROWBAND_SUBGRID_SIZE 16

		#include "tnlNarrowBand.h"

		@@ -225,7 +225,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r
		cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost);
		cudaDeviceSynchronize();
		checkCudaDevice;
		if(reinit != 0 && time != finalTime )
		if(reinit != 0 /&& time != finalTime /)
		{
		cout << time << endl;

		@@ -273,8 +273,8 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
		{
		// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve,
		// 8 - to the east of curve, 16 - to the west of curve.
		int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);
		/if(cudaStatusVector[subgridID] != 0)/
		int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize;
		if(/cudaStatusVector[subgridID] != 0 &&/ i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y())
		{
		tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
		Entity.setCoordinates(CoordinatesType(i,j));
		@@ -353,32 +353,33 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i

		int gid = Entity.getIndex();

		if(abs(cudaDofVector2[gid]) > 1.5*h)
		cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector2[gid]);

		if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y())
		{
		if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 )
		{
		cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])0.5h;
		}
		if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 )
		{
		cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])0.5h;
		}

		if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 )
		{
		cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])0.5h;
		}
		if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 )
		{
		cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])0.5h;
		}
		}
		// if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y())
		// {
		// if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 )
		// {
		// cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		// cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])0.5h;
		// }
		// if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 )
		// {
		// cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		// cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])0.5h;
		// }
		//
		// if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 )
		// {
		// cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		// cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])0.5h;
		// }
		// if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 )
		// {
		// cudaDofVector2[gid] = Sign(cudaDofVector2[gid])0.5h;
		// cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])0.5h;
		// }
		// }


		//
		@@ -647,7 +648,9 @@ __global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, in
		// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve,
		// 8 - to the east of curve, 16 - to the west of curve.
		if(blockIdx.x > 0)
		{
		atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16);
		}

		if(blockIdx.x < gridDim.x - 1)
		atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8);
		@@ -690,26 +693,40 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
		tnlNeighbourGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
		double value = solver->cudaDofVector2[Entity.getIndex()];
		double xf,xb,yf,yb, grad, fu, a,b;
		a = b = 0.0;

		if( i == 0 \|\| (threadIdx.x == 0 && !(status & 9)) )
		yb = yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value;
		{
		xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()];
		xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value;
		}
		else if( i == solver->Mesh.getDimensions().x() - 1 \|\| (threadIdx.x == blockDim.x - 1 && !(status & 17)) )
		yb = yf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()];
		{
		xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()];
		xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()] - value;
		}
		else
		{
		yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()];
		yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value;
		xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()];
		xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value;
		}

		if( j == 0 \|\| (threadIdx.y == 0 && !(status & 3)) )
		xb = xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value;
		{
		yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] ;
		yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value;
		}
		else if( j == solver->Mesh.getDimensions().y() - 1 \|\| (threadIdx.y == blockDim.y - 1 && !(status & 5)) )
		xb = xf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
		{
		yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
		yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()] - value;
		}
		else
		{
		xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
		xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value;
		yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
		yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value;
		}
		__syncthreads();



		@@ -739,21 +756,21 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
		}


		/* if(xb > xf)
		a = xb;
		if(xb > xf)
		a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
		else
		a = xf;
		a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();

		if(yb > yf)
		b = yb;
		b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();
		else
		b = yf;*/
		b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();



		grad = sqrt(/0.5 / (xfxf + xbxb + yfyf + ybyb ) )solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
		// grad = sqrt(0.5 * (xfxf + xbxb + yfyf + ybyb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();

		// grad = sqrt(/0.5 / (aa + bb ) )solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
		grad = sqrt(/0.5 / (aa + b*b ) );

		fu = -1.0 * grad;

		@@ -762,17 +779,17 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
		// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve,
		// 8 - to the east of curve, 16 - to the west of curve.

		if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) )
		if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) )
		atomicMax(solver->reinitialize,1);
		else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) )
		else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) )
		atomicMax(solver->reinitialize,1);
		else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) )
		else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) )
		atomicMax(solver->reinitialize,1);
		else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) )
		else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) )
		atomicMax(solver->reinitialize,1);
		}

		solver->cudaDofVector2[Entity.getIndex()] = value+tau*fu;
		solver->cudaDofVector2[Entity.getIndex()] += tau*fu;
		}
		}
		}

examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h

0 → 100644

+1313 −0

File added.

Preview size limit exceeded, changes collapsed.