3D FSM+FIM implemented (f206b754) · Commits · TNL / tnl-dev

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h

+112 −102

Original line number	Diff line number	Diff line
		@@ -100,6 +100,7 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
		typedef Index IndexType;
		typedef Functions::MeshFunction< MeshType > MeshFunctionType;
		typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType;
		typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer;
		using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >;
		using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >;

		@@ -112,7 +113,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
		const MeshEntity& cell,
		const RealType velocity = 1.0);

		__cuda_callable__ bool updateCell( volatile Real sArray[10][10][10],
		template< int sizeSArray >
		void updateBlocks( const InterfaceMapType interfaceMap,
		const MeshFunctionType aux,
		MeshFunctionType& helpFunc,
		ArrayContainer BlockIterHost, int numThreadsPerBlock/, Real sArray/ );

		void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ );

		template< int sizeSArray >
		__cuda_callable__ bool updateCell3D( volatile Real *sArray,
		int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz,
		const Real velocity = 1.0 );
		};

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h

+443 −76

File changed.

Preview size limit exceeded, changes collapsed.

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h

+112 −110

Original line number	Diff line number	Diff line
		@@ -130,6 +130,8 @@ class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >
		using typename BaseType::MeshFunctionType;
		using typename BaseType::InterfaceMapPointer;
		using typename BaseType::MeshFunctionPointer;
		using typename BaseType::ArrayContainer;


		FastSweepingMethod();

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h

+63 −11

Original line number	Diff line number	Diff line
		@@ -15,9 +15,12 @@

		#include "tnlFastSweepingMethod.h"
		#include <TNL/Devices/Cuda.h>
		#include <string.h>
		#include <TNL/Communicators/MpiDefs.h>




		#include <string.h>
		#include <iostream>
		#include <fstream>

		@@ -80,16 +83,48 @@ solve( const MeshPointer& mesh,
		MeshFunctionType aux = *auxPtr;


		//#ifdef HAVE_MPI
		bool a = Communicators::MpiCommunicator::IsInitialized();
		if( a )
		printf("Je Init\n");
		else
		printf("Neni Init\n");
		//#endif

		while( iteration < this->maxIterations )
		{
		if( std::is_same< DeviceType, Devices::Host >::value )
		{
		int numThreadsPerBlock = 16;
		int numThreadsPerBlock = -1;

		numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0));
		//printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
		if( numThreadsPerBlock <= 16 )
		numThreadsPerBlock = 16;
		else if(numThreadsPerBlock <= 32 )
		numThreadsPerBlock = 32;
		else if(numThreadsPerBlock <= 64 )
		numThreadsPerBlock = 64;
		else if(numThreadsPerBlock <= 128 )
		numThreadsPerBlock = 128;
		else if(numThreadsPerBlock <= 256 )
		numThreadsPerBlock = 256;
		else if(numThreadsPerBlock <= 512 )
		numThreadsPerBlock = 512;
		else
		numThreadsPerBlock = 1024;
		//printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);

		if( numThreadsPerBlock == -1 ){
		printf("Fail in setting numThreadsPerBlock.\n");
		break;
		}



		int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0);
		int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0);

		//std::cout << "numBlocksX = " << numBlocksX << std::endl;

		/Real sArray = new Real[numBlocksX*numBlocksY];
		@@ -115,13 +150,29 @@ solve( const MeshPointer& mesh,
		}
		std::cout<<std::endl;*/
		unsigned int numWhile = 0;
		while( IsCalculationDone && numWhile < 1 )
		while( IsCalculationDone )
		{
		IsCalculationDone = 0;
		helpFunc1 = auxPtr;
		auxPtr = helpFunc;
		helpFunc = helpFunc1;
		switch ( numThreadsPerBlock ){
		case 16:
		this->template updateBlocks< 18 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		case 32:
		this->template updateBlocks< 34 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		case 64:
		this->template updateBlocks< 66 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		case 128:
		this->template updateBlocks< 130 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		case 256:
		this->template updateBlocks< 258 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		case 512:
		this->template updateBlocks< 514 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		default:
		this->template updateBlocks< 1028 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );
		}


		//Reduction
		for( int i = 0; i < BlockIterHost.getSize(); i++ ){
		@@ -131,14 +182,14 @@ solve( const MeshPointer& mesh,
		}
		}
		numWhile++;
		std::cout <<"numWhile = "<< numWhile <<std::endl;
		/*std::cout <<"numWhile = "<< numWhile <<std::endl;

		for( int j = numBlocksY-1; j>-1; j-- ){
		for( int i = 0; i < numBlocksX; i++ )
		std::cout << BlockIterHost[ j * numBlocksX + i ];
		std::cout << std::endl;
		}
		std::cout << std::endl;
		std::cout << std::endl;*/

		this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY );

		@@ -150,8 +201,8 @@ solve( const MeshPointer& mesh,
		std::cout << std::endl;*/

		//std::cout<<std::endl;
		string s( "aux-"+ std::to_string(numWhile) + ".tnl");
		aux.save( s );
		//string s( "aux-"+ std::to_string(numWhile) + ".tnl");
		//aux.save( s );
		}
		if( numWhile == 1 ){
		auxPtr = helpFunc;
		@@ -408,6 +459,7 @@ solve( const MeshPointer& mesh,
		}
		iteration++;
		}
		//#endif
		aux.save("aux-final.tnl");
		}

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h

+274 −181

Original line number	Diff line number	Diff line
		@@ -64,9 +64,6 @@ solve( const MeshPointer& mesh,
		interfaceMapPtr->setMesh( mesh );
		std::cout << "Initiating the interface cells ..." << std::endl;
		BaseType::initInterface( u, auxPtr, interfaceMapPtr );
		#ifdef HAVE_CUDA
		cudaDeviceSynchronize();
		#endif
		auxPtr->save( "aux-ini.tnl" );

		typename MeshType::Cell cell( *mesh );
		@@ -78,7 +75,96 @@ solve( const MeshPointer& mesh,
		{
		if( std::is_same< DeviceType, Devices::Host >::value )
		{
		for( cell.getCoordinates().z() = 0;
		int numThreadsPerBlock = 64;


		int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0);
		int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0);
		int numBlocksZ = mesh->getDimensions().z() / numThreadsPerBlock + (mesh->getDimensions().z() % numThreadsPerBlock != 0 ? 1:0);
		//std::cout << "numBlocksX = " << numBlocksX << std::endl;

		/Real sArray = new Real[numBlocksX*numBlocksY];
		for( int i = 0; i < numBlocksX * numBlocksY; i++ )
		sArray[ i ] = new Real [ (numThreadsPerBlock + 2)(numThreadsPerBlock + 2)];/

		ArrayContainer BlockIterHost;
		BlockIterHost.setSize( numBlocksX * numBlocksY * numBlocksZ );
		BlockIterHost.setValue( 1 );
		int IsCalculationDone = 1;

		MeshFunctionPointer helpFunc( mesh );
		MeshFunctionPointer helpFunc1( mesh );
		helpFunc1 = auxPtr;
		auxPtr = helpFunc;
		helpFunc = helpFunc1;
		//std::cout<< "Size = " << BlockIterHost.getSize() << std::endl;
		/*for( int k = numBlocksX-1; k >-1; k-- ){
		for( int l = 0; l < numBlocksY; l++ ){
		std::cout<< BlockIterHost[ l*numBlocksX + k ];
		}
		std::cout<<std::endl;
		}
		std::cout<<std::endl;*/
		unsigned int numWhile = 0;
		while( IsCalculationDone )
		{
		IsCalculationDone = 0;
		helpFunc1 = auxPtr;
		auxPtr = helpFunc;
		helpFunc = helpFunc1;
		this->template updateBlocks< 66 >( interfaceMap, auxPtr, helpFunc, BlockIterHost, numThreadsPerBlock/, sArray/ );

		//Reduction
		for( int i = 0; i < BlockIterHost.getSize(); i++ ){
		if( IsCalculationDone == 0 ){
		IsCalculationDone = IsCalculationDone \|\| BlockIterHost[ i ];
		//break;
		}
		}
		numWhile++;
		std::cout <<"numWhile = "<< numWhile <<std::endl;
		/*for( int k = 0; k < numBlocksZ; k++ ){
		for( int j = numBlocksY-1; j>-1; j-- ){
		for( int i = 0; i < numBlocksX; i++ ){
		//std::cout << (auxPtr)[ k numBlocksX * numBlocksY + j * numBlocksX + i ] << " ";
		std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ];
		}
		std::cout << std::endl;
		}
		std::cout << std::endl;
		}
		std::cout << std::endl;*/

		this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY, numBlocksZ );

		/*for( int k = 0; k < numBlocksZ; k++ ){
		for( int j = numBlocksY-1; j>-1; j-- ){
		for( int i = 0; i < numBlocksX; i++ ){
		//std::cout << (auxPtr)[ k numBlocksX * numBlocksY + j * numBlocksX + i ] << " ";
		std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ];
		}
		std::cout << std::endl;
		}
		std::cout << std::endl;
		}*/

		/*for( int j = numBlocksY-1; j>-1; j-- ){
		for( int i = 0; i < numBlocksX; i++ )
		std::cout << "BlockIterHost = "<< jnumBlocksX + i<< " ," << BlockIterHost[ j numBlocksX + i ];
		std::cout << std::endl;
		}
		std::cout << std::endl;*/

		//std::cout<<std::endl;
		//string s( "aux-"+ std::to_string(numWhile) + ".tnl");
		//aux.save( s );
		}
		if( numWhile == 1 ){
		auxPtr = helpFunc;
		}
		aux = *auxPtr;

		/*for( cell.getCoordinates().z() = 0;
		cell.getCoordinates().z() < mesh->getDimensions().z();
		cell.getCoordinates().z()++ )
		{
		@@ -241,7 +327,7 @@ solve( const MeshPointer& mesh,
		this->updateCell( aux, cell );
		}
		}
		}
		}*/
		}
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		@@ -389,7 +475,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		{
		__syncthreads();

		__shared__ volatile bool changed[ (sizeSArray - 2)(sizeSArray - 2)(sizeSArray - 2)];
		__shared__ volatile bool changed[ 888/(sizeSArray - 2)(sizeSArray - 2)(sizeSArray - 2)/];

		changed[ currentIndex ] = false;
		if( thrj == 0 && thri == 0 && thrk == 0 )
		@@ -402,6 +488,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<

		if( thrj == 1 && thri == 1 && thrk == 1 )
		{
		//printf( "We are in the calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x );
		hx = mesh.getSpaceSteps().x();
		hy = mesh.getSpaceSteps().y();
		hz = mesh.getSpaceSteps().z();
		@@ -410,8 +497,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		dimZ = mesh.getDimensions().z();
		BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] = 0;
		}
		__shared__ volatile Real sArray[sizeSArray][sizeSArray][sizeSArray];
		sArray[thrk+1][thrj+1][thri+1] = std::numeric_limits< Real >::max();
		__shared__ volatile Real sArray[ 101010/sizeSArray sizeSArray * sizeSArray*/ ];
		sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = std::numeric_limits< Real >::max();

		//filling sArray edges
		int numOfBlockx;
		@@ -426,6 +513,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		numOfBlockx = gridDim.x;
		numOfBlocky = gridDim.y;
		numOfBlockz = gridDim.z;
		__syncthreads();

		if( numOfBlockx - 1 == blIdx )
		xkolik = dimX - (blIdx)*blockDim.x+1;
		@@ -438,54 +526,55 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		if( thri == 0 )
		{
		if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik )
		sArray[thrk+1][thrj+1][0] = aux[ blIdzblockDim.z dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x + thrj * dimX -1 + thrkdimXdimY ];
		sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)sizeSArray + 0] = aux[ blIdzblockDim.z * dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x + thrj * dimX -1 + thrkdimXdimY ];
		else
		sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max();
		sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = std::numeric_limits< Real >::max();
		}

		if( thri == 1 )
		{
		if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik )
		sArray[thrk+1][thrj+1][9] = aux[ blIdzblockDim.z dimX * dimY + blIdy blockDim.ydimX+ blIdxblockDim.x + blockDim.x + thrj dimX + thrkdimXdimY ];
		sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) sizeSArray + xkolik ] = aux[ blIdzblockDim.z * dimX * dimY + blIdy blockDim.ydimX+ blIdxblockDim.x + blockDim.x + thrj dimX + thrkdimXdimY ];
		else
		sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max();
		sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1)*sizeSArray + xkolik] = std::numeric_limits< Real >::max();
		}
		if( thri == 2 )
		{
		if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik )
		sArray[thrk+1][0][thrj+1] = aux[ blIdzblockDim.z dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x - dimX + thrj + thrkdimXdimY ];
		sArray[ (thrk+1) * sizeSArray * sizeSArray +0sizeSArray + thrj+1] = aux[ blIdzblockDim.z * dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x - dimX + thrj + thrkdimXdimY ];
		else
		sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max();
		sArray[ (thrk+1) * sizeSArray * sizeSArray + 0*sizeSArray + thrj+1] = std::numeric_limits< Real >::max();
		}

		if( thri == 3 )
		{
		if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik )
		sArray[thrk+1][9][thrj+1] = aux[ blIdzblockDim.z dimX * dimY + (blIdy+1) * blockDim.ydimX + blIdxblockDim.x + thrj + thrkdimXdimY ];
		sArray[ (thrk+1) * sizeSArray * sizeSArray + ykoliksizeSArray + thrj+1] = aux[ blIdzblockDim.z * dimX * dimY + (blIdy+1) * blockDim.ydimX + blIdxblockDim.x + thrj + thrkdimXdimY ];
		else
		sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max();
		sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = std::numeric_limits< Real >::max();
		}
		if( thri == 4 )
		{
		if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik )
		sArray[0][thrj+1][thrk+1] = aux[ blIdzblockDim.z dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x - dimX * dimY + thrj * dimX + thrk ];
		sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = aux[ blIdzblockDim.z dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x - dimX * dimY + thrj * dimX + thrk ];
		else
		sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max();
		sArray[0 * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thrk+1] = std::numeric_limits< Real >::max();
		}

		if( thri == 5 )
		{
		if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik )
		sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)blockDim.z dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x + thrj * dimX + thrk ];
		sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = aux[ (blIdz+1)blockDim.z dimX * dimY + blIdy * blockDim.ydimX + blIdxblockDim.x + thrj * dimX + thrk ];
		else
		sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max();
		sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = std::numeric_limits< Real >::max();
		}

		if( i < dimX && j < dimY && k < dimZ )
		{
		sArray[thrk+1][thrj+1][thri+1] = aux[ kdimXdimY + j*dimX + i ];
		sArray[(thrk+1) * sizeSArray * sizeSArray + (thrj+1) sizeSArray + thri+1] = aux[ kdimXdimY + jdimX + i ];
		}
		__syncthreads();

		while( changed[ 0 ] )
		{
		__syncthreads();
		@@ -493,11 +582,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		changed[ currentIndex ] = false;

		//calculation of update cell
		if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ )
		if( i < dimX && j < dimY && k < dimZ )
		{
		if( ! interfaceMap[ kdimXdimY + j * mesh.getDimensions().x() + i ] )
		if( ! interfaceMap[ kdimXdimY + j * dimX + i ] )
		{
		changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz);
		changed[ currentIndex ] = ptr.updateCell3D< sizeSArray >( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz);
		}
		}
		__syncthreads();
		@@ -535,7 +624,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		}
		}
		__syncthreads();
		if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU
		if( currentIndex < 32 )
		{
		if( true ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 32 ];
		if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 16 ];
		@@ -548,7 +637,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<

		/*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0)
		{
		for(int m = 0; m < 8; m++){
		//for(int m = 0; m < 8; m++){
		int m = 4;
		for(int n = 0; n<8; n++){
		for(int b=0; b<8; b++)
		printf(" %i ", changed[m64 + n8 + b]);
		@@ -556,16 +646,19 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		}
		printf("\n \n");
		}
		}*/
		//}*/

		if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 )
		{
		BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1;
		//printf( "Setting block calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x );
		BlockIterDevice[ blIdz * gridDim.x * gridDim.y + blIdy * gridDim.x + blIdx ] = 1;
		}
		__syncthreads();
		}

		if( i < dimX && j < dimY && k < dimZ )
		helpFunc[ kdimXdimY + j * dimX + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ];
		helpFunc[ kdimXdimY + j * dimX + i ] = sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thri+1 ];

		}
		}
		#endif