Commit 042a23f5 authored by Tomas Sobotik's avatar Tomas Sobotik
Browse files

Tweaks

parent a9340955
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -17,9 +17,9 @@

#include "MainBuildConfig.h"
	//for HOST versions:
#include "tnlFastSweepingMap.h"
//#include "tnlFastSweepingMap.h"
	//for DEVICE versions:
//#include "tnlFastSweepingMap_CUDA.h"
#include "tnlFastSweepingMap_CUDA.h"
#include "fastSweepingMapConfig.h"
#include <solvers/tnlBuildConfigTags.h>

+2 −0
Original line number Diff line number Diff line
@@ -904,6 +904,8 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS
   double fu = 0.0;

   double finalTime = this->stopTime;
   if(boundaryCondition == 0)
	   finalTime*=2.0;
   __syncthreads();

   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
+1 −1
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device>
tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver()
{
	cout << "a" << endl;
	this->device = tnlHostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice   ---    vypocet na CPU
	this->device = tnlCudaDevice;  /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice   ---    vypocet na CPU

#ifdef HAVE_CUDA
	if(this->device == tnlCudaDevice)
+70 −53
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@
#ifndef TNLNARROWBAND2D_IMPL_H_
#define TNLNARROWBAND2D_IMPL_H_

#define NARROWBAND_SUBGRID_SIZE 8
#define NARROWBAND_SUBGRID_SIZE 16

#include "tnlNarrowBand.h"

@@ -225,7 +225,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r
		cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost);
		cudaDeviceSynchronize();
		checkCudaDevice;
		if(reinit != 0 && time != finalTime )
		if(reinit != 0 /*&& time != finalTime */)
		{
			cout << time << endl;

@@ -273,8 +273,8 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
{
	//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
	//								8 - to the east of curve, 	16 - to the west of curve.
	int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);
	/*if(cudaStatusVector[subgridID] != 0)*/
	int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize;
	if(/*cudaStatusVector[subgridID] != 0 &&*/ i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y())
	{
		tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
		Entity.setCoordinates(CoordinatesType(i,j));
@@ -353,32 +353,33 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i

	int gid = Entity.getIndex();

	if(abs(cudaDofVector2[gid]) > 1.5*h)
		cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector2[gid]);

	if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y())
	{
		if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 )
		{
			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
			cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])*0.5*h;
		}
		if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 )
		{
			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
			cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h;
		}

		if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 )
		{
			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
			cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])*0.5*h;
		}
		if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 )
		{
			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
			cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h;
		}
	}
//	if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y())
//	{
//		if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 )
//		{
//			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
//			cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])*0.5*h;
//		}
//		if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 )
//		{
//			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
//			cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h;
//		}
//
//		if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 )
//		{
//			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
//			cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])*0.5*h;
//		}
//		if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 )
//		{
//			cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h;
//			cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h;
//		}
//	}


//
@@ -647,7 +648,9 @@ __global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, in
//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
//								8 - to the east of curve, 	16 - to the west of curve.
			if(blockIdx.x > 0)
			{
				atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16);
			}

			if(blockIdx.x < gridDim.x - 1)
				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8);
@@ -690,26 +693,40 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
			tnlNeighbourGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
			double value = solver->cudaDofVector2[Entity.getIndex()];
			double xf,xb,yf,yb, grad, fu, a,b;
			a = b = 0.0;

			if( i == 0 || (threadIdx.x == 0 && !(status & 9)) )
				yb = yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
			{
				xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
				xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
			}
			else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) )
				yb = yf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
			{
				xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
				xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()] - value;
			}
			else
			{
				yb =  value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
				yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
				xb =  value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
				xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
			}

			if( j == 0 || (threadIdx.y == 0 && !(status & 3)) )
				xb = xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
			{
				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] ;
				yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
			}
			else if( j == solver->Mesh.getDimensions().y() - 1  || (threadIdx.y == blockDim.y - 1 && !(status & 5)) )
				xb = xf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
			{
				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
				yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()] - value;
			}
			else
			{
				xb =  value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
				xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
				yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
			}
			__syncthreads();



@@ -739,21 +756,21 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
			   }


			 /*  if(xb > xf)
				   a = xb;
			   if(xb > xf)
				   a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
			   else
				   a = xf;
				   a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();

			   if(yb > yf)
				   b = yb;
				   b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();
			   else
				   b = yf;*/
				   b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();



			   grad = sqrt(/*0.5 **/ (xf*xf + xb*xb    +   yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
//			grad = sqrt(0.5 * (xf*xf + xb*xb    +   yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();

//			   grad = sqrt(/*0.5 **/ (a*a    +   b*b ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
			grad = sqrt(/*0.5 **/ (a*a    +   b*b ) );

			fu = -1.0 * grad;

@@ -762,17 +779,17 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
				//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
				//								8 - to the east of curve, 	16 - to the west of curve.

				if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) )
				if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) )
					atomicMax(solver->reinitialize,1);
				else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) )
				else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) )
					atomicMax(solver->reinitialize,1);
				else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) )
				else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) )
					atomicMax(solver->reinitialize,1);
				else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) )
				else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) )
					atomicMax(solver->reinitialize,1);
			}

			solver->cudaDofVector2[Entity.getIndex()]  = value+tau*fu;
			solver->cudaDofVector2[Entity.getIndex()]  += tau*fu;
		}
	}
}
+1313 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading