Added several parallel implementations for fast-sweeping method (229e25d6) · Commits · TNL / tnl-dev

examples/fast-sweeping/fastSweepingConfig.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -30,6 +30,7 @@ class fastSweepingConfig
		config.addEntry < tnlString > ( "problem-name", "This defines particular problem.", "fast-sweeping" );
		config.addRequiredEntry < tnlString > ( "initial-condition", "Initial condition for solver");
		config.addEntry < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" );
		config.addEntry < tnlString > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" );
		}
		};

examples/fast-sweeping/main.h

+5 −1

Original line number	Diff line number	Diff line
		@@ -16,7 +16,10 @@


		#include "MainBuildConfig.h"
		#include "tnlFastSweeping.h"
		//for HOST versions:
		//#include "tnlFastSweeping.h"
		//for DEVICE versions:
		#include "tnlFastSweeping_CUDA.h"
		#include "fastSweepingConfig.h"
		#include <solvers/tnlConfigTags.h>

		@@ -46,6 +49,7 @@ int main( int argc, char* argv[] )
		cerr << "Solver failed to initialize." << endl;
		return EXIT_FAILURE;
		}
		checkCudaDevice;
		cout << "-------------------------------------------------------------" << endl;
		cout << "Starting solver..." << endl;
		solver.run();

examples/fast-sweeping/tnlFastSweeping.h

+19 −3

Original line number	Diff line number	Diff line
		@@ -24,6 +24,9 @@
		#include <limits.h>
		#include <core/tnlDevice.h>
		#include <ctime>
		#ifdef HAVE_OPENMP
		#include <omp.h>
		#endif



		@@ -60,7 +63,12 @@ public:

		bool initGrid();
		bool run();
		void updateValue(const Index i, const Index j);

		//for single core version use this implementation:
		//void updateValue(const Index i, const Index j);
		//for parallel version use this one instead:
		void updateValue(const Index i, const Index j, DofVectorType* grid);

		Real fabsMin(const Real x, const Real y);


		@@ -68,14 +76,22 @@ protected:

		MeshType Mesh;

		bool exactInput;

		DofVectorType dofVector;

		RealType h;

		#ifdef HAVE_OPENMP
		// omp_lock_t* gridLock;
		#endif

		};

		};

		#include "tnlFastSweeping2D_impl.h"
		//for single core version use this implementation:
		//#include "tnlFastSweeping2D_impl.h"
		//for parallel version use this one instead:
		#include "tnlFastSweeping2D_openMP_impl.h"

		#endif /* TNLFASTSWEEPING_H_ */

examples/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h

0 → 100644

+522 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		tnlFastSweeping_impl.h - description
		-------------------
		begin : Oct 15 , 2015
		copyright : (C) 2015 by Tomas Sobotik
		***************************************************************************/

		/***************************************************************************
		* *
		* This program is free software; you can redistribute it and/or modify *
		* it under the terms of the GNU General Public License as published by *
		* the Free Software Foundation; either version 2 of the License, or *
		* (at your option) any later version. *
		* *
		***************************************************************************/
		#ifndef TNLFASTSWEEPING2D_IMPL_H_
		#define TNLFASTSWEEPING2D_IMPL_H_

		#include "tnlFastSweeping.h"


		template< typename MeshReal,
		typename Device,
		typename MeshIndex,
		typename Real,
		typename Index >
		tnlString tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
		{
		return tnlString( "tnlFastSweeping< " ) +
		MeshType::getType() + ", " +
		::getType< Real >() + ", " +
		::getType< Index >() + " >";
		}




		template< typename MeshReal,
		typename Device,
		typename MeshIndex,
		typename Real,
		typename Index >
		bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
		{
		const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );

		if( ! Mesh.load( meshFile ) )
		{
		cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
		return false;
		}


		const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
		if( ! dofVector.load( initialCondition ) )
		{
		cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
		return false;
		}

		h = Mesh.getHx();
		counter = 0;

		const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );

		if(exact_input == "no")
		exactInput=false;
		else
		exactInput=true;


		#ifdef HAVE_CUDA

		cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double));
		cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);

		cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double));
		cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);


		cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
		cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);

		#endif

		int n = Mesh.getDimensions().x();
		dim3 threadsPerBlock(16, 16);
		dim3 numBlocks(n/16 + 1 ,n/16 +1);

		initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
		cudaDeviceSynchronize();
		checkCudaDevice;

		return true;
		}





		template< typename MeshReal,
		typename Device,
		typename MeshIndex,
		typename Real,
		typename Index >
		bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
		{
		//
		// for(Index i = 0; i < Mesh.getDimensions().x(); i++)
		// {
		// for(Index j = 0; j < Mesh.getDimensions().y(); j++)
		// {
		// updateValue(i,j);
		// }
		// }
		//
		///---------------------------------------------------------------------------------------------------------------------------/
		//
		// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
		// {
		// for(Index j = 0; j < Mesh.getDimensions().y(); j++)
		// {
		// updateValue(i,j);
		// }
		// }
		//
		///---------------------------------------------------------------------------------------------------------------------------/
		//
		// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
		// {
		// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
		// {
		// updateValue(i,j);
		// }
		// }
		//
		///---------------------------------------------------------------------------------------------------------------------------/
		// for(Index i = 0; i < Mesh.getDimensions().x(); i++)
		// {
		// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
		// {
		// updateValue(i,j);
		// }
		// }
		//
		///---------------------------------------------------------------------------------------------------------------------------/
		//
		//
		// dofVector.save("u-00001.tnl");

		int n = Mesh.getDimensions().x();
		dim3 threadsPerBlock(32, 32);
		dim3 numBlocks(n/32 + 1 ,n/32 +1);

		for(int i = 2*n - 1; i > -1; i--)
		{
		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i);
		cudaDeviceSynchronize();
		}
		cudaDeviceSynchronize();
		checkCudaDevice;
		for(int i = 0; i < 2*n ; i++)
		{
		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i);
		cudaDeviceSynchronize();
		}
		cudaDeviceSynchronize();
		checkCudaDevice;
		for(int i = 0; i < 2*n ; i++)
		{
		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i);
		cudaDeviceSynchronize();
		}
		cudaDeviceSynchronize();
		checkCudaDevice;
		for(int i = 2*n - 1; i > -1; i--)
		{
		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,i);
		cudaDeviceSynchronize();
		}

		cudaDeviceSynchronize();
		checkCudaDevice;

		cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
		cudaDeviceSynchronize();
		cudaFree(cudaDofVector);
		cudaFree(cudaDofVector2);
		cudaFree(cudaSolver);
		dofVector.save("u-00001.tnl");
		cudaDeviceSynchronize();
		return true;
		}




		#ifdef HAVE_CUDA


		template< typename MeshReal,
		typename Device,
		typename MeshIndex,
		typename Real,
		typename Index >
		__device__
		void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
		{
		Index index = Mesh.getCellIndex(CoordinatesType(i,j));
		Real value = cudaDofVector[index];
		Real a,b, tmp;

		if( i == 0 )
		a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)];
		else if( i == Mesh.getDimensions().x() - 1 )
		a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)];
		else
		{
		a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)],
		cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] );
		}

		if( j == 0 )
		b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)];
		else if( j == Mesh.getDimensions().y() - 1 )
		b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)];
		else
		{
		b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)],
		cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] );
		}


		if(abs(a-b) >= h)
		tmp = fabsMin(a,b) + Sign(value)*h;
		else
		tmp = 0.5 * (a + b + Sign(value)sqrt(2.0 h * h - (a - b) * (a - b) ) );

		cudaDofVector[index] = fabsMin(value, tmp);

		}


		template< typename MeshReal,
		typename Device,
		typename MeshIndex,
		typename Real,
		typename Index >
		__device__
		bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
		{
		int gx = threadIdx.x + blockDim.x*blockIdx.x;
		int gy = blockDim.y*blockIdx.y + threadIdx.y;
		int gid = Mesh.getCellIndex(CoordinatesType(gx,gy));

		int total = blockDim.x*gridDim.x;



		Real tmp = 0.0;
		int flag = 0;
		counter = 0;
		tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);


		if(!exactInput)
		{
		cudaDofVector[gid]=cudaDofVector[gid]=0.5hSign(cudaDofVector[gid]);
		}
		__threadfence();
		// printf("-----------------------------------------------------------------------------------\n");

		__threadfence();

		if(gx > 0 && gx < Mesh.getDimensions().x()-1)
		{
		if(gy > 0 && gy < Mesh.getDimensions().y()-1)
		{

		Index j = gy;
		Index i = gx;
		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);

		if(tmp == 0.0)
		{}
		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
		{}
		else
		flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
		}
		}

		// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
		// printf("****************************************************************\n");
		// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
		if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0)
		{
		// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
		Index j = 0;
		Index i = gx;
		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);


		if(tmp == 0.0)
		{}
		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
		{}
		else
		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
		}

		// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
		if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1)
		{
		Index i = gx;
		Index j = Mesh.getDimensions().y() - 1;
		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);


		if(tmp == 0.0)
		{}
		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
		{}
		else
		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
		}

		// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
		if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0)
		{
		Index j = gy;
		Index i = 0;
		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);


		if(tmp == 0.0)
		{}
		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
		{}
		else
		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
		}
		// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
		if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1)
		{
		Index j = gy;
		Index i = Mesh.getDimensions().x() - 1;
		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);


		if(tmp == 0.0)
		{}
		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 \|\|
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
		{}
		else
		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
		}

		// printf("##################################################################################################\n");
		if(gx == Mesh.getDimensions().x() - 1 &&
		gy == Mesh.getDimensions().y() - 1)
		{

		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)

		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
		}
		if(gx == Mesh.getDimensions().x() - 1 &&
		gy == 0)
		{

		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)

		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
		}
		// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n");
		if(gx == 0 &&
		gy == Mesh.getDimensions().y() - 1)
		{

		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)

		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
		}
		if(gx == 0 &&
		gy == 0)
		{
		// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
		cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)

		flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
		}

		__threadfence();

		if(flag==1)
		cudaDofVector[gid] = tmp*3;
		}


		template< typename MeshReal,
		typename Device,
		typename MeshIndex,
		typename Real,
		typename Index >
		__device__
		Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
		{
		Real fx = abs(x);
		Real fy = abs(y);

		Real tmpMin = Min(fx,fy);

		if(tmpMin == fx)
		return x;
		else
		return y;


		}



		__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
		{

		int gx = threadIdx.x + blockDim.x*blockIdx.x;
		int gy = blockDim.y*blockIdx.y + threadIdx.y;
		if(solver->Mesh.getDimensions().x() <= gx \|\| solver->Mesh.getDimensions().y() <= gy)
		return;
		int total = solver->Mesh.getDimensions().x();
		//int gid = solver->Mesh.getDimensions().x() * gy + gx;
		int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();

		int id1 = gx+gy;
		int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;

		/---------------------------------------------------------------------------------------------------------------------------/
		if(sweep == 1)
		// for(int i = 0; i < 2*total - 1; i++)
		{
		if(id1 == i)
		{
		solver->updateValue(gx,gy);
		return;
		}

		}
		/---------------------------------------------------------------------------------------------------------------------------/
		else if(sweep == 2)
		// for(int i = 0; i < 2*total - 1; i++)
		{
		if(id2 == i)
		{
		solver->updateValue(gx,gy);
		return;
		}
		}
		/---------------------------------------------------------------------------------------------------------------------------/
		else if(sweep == 3)
		// for(int i = 2*total - 2; i > -1; i--)
		{
		if(id1 == i)
		{
		solver->updateValue(gx,gy);
		return;
		}
		}
		/---------------------------------------------------------------------------------------------------------------------------/
		else if(sweep == 4)
		// for(int i = 2*total - 2; i > -1; i--)
		{
		if(id2 == i)
		{
		solver->updateValue(gx,gy);
		return;
		}
		}
		/---------------------------------------------------------------------------------------------------------------------------/




		}


		__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
		{
		int gx = threadIdx.x + blockDim.x*blockIdx.x;
		int gy = blockDim.y*blockIdx.y + threadIdx.y;

		if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
		{
		solver->initGrid();
		}


		}
		#endif




		#endif /* TNLFASTSWEEPING_IMPL_H_ */

examples/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h

0 → 100644

+588 −0

File added.

Preview size limit exceeded, changes collapsed.