Chess model implemented in 2D. (bf5e6fd6) · Commits · TNL / tnl-dev

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		Real *aux,
		int *BlockIterDevice);
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
		int *BlockIterDevice, int oddEvenBlock);
		__global__ void CudaParallelReduc( int BlockIterDevice, int dBlock, int nBlocks );

		template < typename Real, typename Device, typename Index >
		__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );
		/*template < typename Real, typename Device, typename Index >
		__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real dAux, int a );/

		template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h

+6 −6

Original line number	Diff line number	Diff line
		@@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
		{
		sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrj ][ thri ];
		if ( fabs( tmp ) > 0.01*hx )
		if ( fabs( tmp ) > 0.001*hx )
		return true;
		else
		return false;
		@@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
		( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
		sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrj ][ thri ];
		if ( fabs( tmp ) > 0.01*hx )
		if ( fabs( tmp ) > 0.001*hx )
		return true;
		else
		return false;
		@@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v )
		sArray[ thri ] = argAbsMin( value, tmp );

		tmp = value - sArray[ thri ];
		if ( fabs( tmp ) > 0.01*h )
		if ( fabs( tmp ) > 0.001*h )
		return true;
		else
		return false;
		@@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk,
		{
		sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrk ][ thrj ][ thri ];
		if ( fabs( tmp ) > 0.01*hx )
		if ( fabs( tmp ) > 0.001*hx )
		return true;
		else
		return false;
		@@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk,
		{
		sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrk ][ thrj ][ thri ];
		if ( fabs( tmp ) > 0.01*hx )
		if ( fabs( tmp ) > 0.001*hx )
		return true;
		else
		return false;
		@@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk,
		hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx );
		sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrk ][ thrj ][ thri ];
		if ( fabs( tmp ) > 0.01*hx )
		if ( fabs( tmp ) > 0.001*hx )
		return true;
		else
		return false;

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h

+114 −98

Original line number	Diff line number	Diff line
		@@ -26,7 +26,7 @@ template< typename Real,
		typename Anisotropy >
		FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >::
		FastSweepingMethod()
		: maxIterations( 100 )
		: maxIterations( 1 )
		{

		}
		@@ -250,7 +250,7 @@ solve( const MeshPointer& mesh,

		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;

		aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 );
		//aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 );

		//int BlockIter = 1;// = (bool)malloc( ( numBlocksX numBlocksY ) * sizeof( bool ) );

		@@ -261,7 +261,7 @@ solve( const MeshPointer& mesh,
		int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0);
		int *dBlock;
		cudaMalloc(&dBlock, nBlocks * sizeof( int ) );

		int oddEvenBlock = 0;
		while( BlockIterD )
		{
		/for( int i = 0; i < numBlocksX numBlocksY; i++ )
		@@ -269,19 +269,30 @@ solve( const MeshPointer& mesh,

		CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
		interfaceMapPtr.template getData< Device >(),
		dAux,
		BlockIterDevice );
		auxPtr.template modifyData< Device>(),
		BlockIterDevice,
		oddEvenBlock );
		TNL_CHECK_CUDA_DEVICE;
		oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
		CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
		interfaceMapPtr.template getData< Device >(),
		auxPtr.template modifyData< Device>(),
		BlockIterDevice,
		oddEvenBlock );
		TNL_CHECK_CUDA_DEVICE;
		oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;

		CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) );
		TNL_CHECK_CUDA_DEVICE;
		CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );

		TNL_CHECK_CUDA_DEVICE;
		cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost);

		/for( int i = 1; i < numBlocksX numBlocksY; i++ )
		BlockIter[ 0 ] = BlockIter[ 0 ] \|\| BlockIter[ i ];*/

		}
		aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 );
		//aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 );
		cudaFree( dAux );
		cudaFree( BlockIterDevice );
		cudaFree( dBlock );
		@@ -299,7 +310,7 @@ solve( const MeshPointer& mesh,
		}

		#ifdef HAVE_CUDA
		template < typename Real, typename Device, typename Index >
		/*template < typename Real, typename Device, typename Index >
		__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a )
		{
		int i = threadIdx.x + blockDim.x*blockIdx.x;
		@@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In
		aux[ jmesh.getDimensions().x() + i ] = dAux[ jmesh.getDimensions().x() + i ];
		}

		}
		}*/

		__global__ void CudaParallelReduc( int BlockIterDevice, int dBlock, int nBlocks )
		{
		@@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int BlockIterDevice, int dBlock, int nBlock
		template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		Real *aux,
		int *BlockIterDevice )
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
		int *BlockIterDevice, int oddEvenBlock )
		{
		int thri = threadIdx.x; int thrj = threadIdx.y;
		int blIdx = blockIdx.x; int blIdy = blockIdx.y;
		@@ -417,6 +428,9 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		}
		__syncthreads();

		if( (blIdy%2 + blIdx) % 2 == oddEvenBlock )
		{

		if( thri == 0 )
		{
		if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik )
		@@ -521,5 +535,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<

		if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) )
		aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ];

		}
		}
		#endif