Mistake found and repaied from 2D. Problem with EOC appeared. (b387101c) · Commits · TNL / tnl-dev

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -69,7 +69,7 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
		const MeshEntity& cell,
		const RealType velocity = 1.0 );

		__cuda_callable__ double updateCell( volatile Real sArray[18][18],
		__cuda_callable__ bool updateCell( volatile Real sArray[18][18],
		int thri, int thrj, const Real hx, const Real hy,
		const Real velocity = 1.0 );

		@@ -117,7 +117,7 @@ template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
		Real *BlockIterDevice );
		bool *BlockIterDevice );

		template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h

+10 −20

Original line number	Diff line number	Diff line
		@@ -859,22 +859,12 @@ template< typename Real,
		typename Device,
		typename Index >
		__cuda_callable__
		double
		bool
		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
		updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy,
		const Real v )
		{

		//const Meshes::Grid< 2, Real, Device, Index >& mesh = u.template getMesh< Devices::Cuda >();
		/*typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell;
		Cell cell( mesh );
		cell.getCoordinates().x() = i; cell.getCoordinates().y() = j;
		cell.refresh();

		const auto& neighborEntities = cell.template getNeighborEntities< 2 >();
		const RealType& hx = mesh.getSpaceSteps().x();
		const RealType& hy = mesh.getSpaceSteps().y();*/
		const RealType value = sArray[ thrj ][ thri ];//u( cell );
		const RealType value = sArray[ thrj ][ thri ];
		RealType a, b, tmp = TypeInfo< RealType >::getMaxValue();

		b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ],
		@@ -885,7 +875,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con

		if( fabs( a ) == TypeInfo< RealType >::getMaxValue() &&
		fabs( b ) == TypeInfo< RealType >::getMaxValue() )
		return 0;
		return false;

		RealType pom[6] = { a, b, TypeInfo< Real >::getMaxValue(), (RealType)hx, (RealType)hy, 0.0 };
		sortMinims( pom );
		@@ -896,10 +886,10 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
		{
		sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrj ][ thri ];
		if ( fabs( tmp ) > 0.1 )
		return 10;
		if ( fabs( tmp ) > 0.001 )
		return true;
		else
		return 0;
		return false;
		}
		else
		{
		@@ -908,12 +898,12 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
		( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
		sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrj ][ thri ];
		if ( fabs( tmp ) > 0.1 )
		return 10;
		if ( fabs( tmp ) > 0.01 )
		return true;
		else
		return 0;
		return false;
		}

		return 0;
		return false;
		}
		#endif

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h

+47 −92

Original line number	Diff line number	Diff line
		@@ -227,22 +227,17 @@ solve( const MeshPointer& mesh,
		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;


		/*int nBlockIter = numBlocksX > numBlocksY ? numBlocksX : numBlocksY;
		nBlockIter = numBlocksX == numBlocksY ? nBlockIter + 1 : nBlockIter;*/

		bool isNotDone = true; numBlocksX = 16; numBlocksY = 16;
		double* BlockIter = (double)malloc( ( numBlocksX numBlocksY ) * sizeof( double ) );
		bool* BlockIter = (bool)malloc( ( numBlocksX numBlocksY ) * sizeof( bool ) );

		double *BlockIterDevice;
		cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( double ) );
		bool *BlockIterDevice;
		cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ) );

		//while( isNotDone )
		while( BlockIter[ 0 ] )
		{
		for( int i = 0; i < numBlocksX * numBlocksY; i++ )
		BlockIter[ i ] = 0.0;
		cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( double ), cudaMemcpyHostToDevice);

		isNotDone = false;
		BlockIter[ i ] = false;
		cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyHostToDevice);

		CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
		interfaceMapPtr.template getData< Device >(),
		@@ -250,23 +245,20 @@ solve( const MeshPointer& mesh,
		BlockIterDevice );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( double ), cudaMemcpyDeviceToHost);
		cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost);

		for( int j = numBlocksY-1; j > -1; j-- )
		/*for( int j = numBlocksY-1; j > -1; j-- )
		{
		for( int i = 0; i < numBlocksX; i++ )
		{
		//if( BlockIter[ j * numBlocksX + i ] )
		std::cout << BlockIter[ j * numBlocksY + i ] << "\t";
		//else
		// std::cout << "0" << " ";
		}
		std::cout << std::endl;
		}
		std::cout << std::endl;
		std::cout << std::endl;*/

		for( int i = 0; i < numBlocksX * numBlocksY; i++ )
		isNotDone = isNotDone \|\| BlockIter[ i ];
		for( int i = 1; i < numBlocksX * numBlocksY; i++ )
		BlockIter[ 0 ] = BlockIter[ 0 ] \|\| BlockIter[ i ];

		}
		delete[] BlockIter;
		@@ -289,7 +281,7 @@ template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
		Real *BlockIterDevice )
		bool *BlockIterDevice )
		{
		int thri = threadIdx.x; int thrj = threadIdx.y;
		int blIdx = blockIdx.x; int blIdy = blockIdx.y;
		@@ -297,15 +289,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		int j = blockDim.y*blIdy + thrj;
		int currentIndex = thrj * 16 + thri;

		__shared__ volatile Real changed[256];
		changed[ currentIndex ] = 0.0;
		__syncthreads();
		__shared__ volatile bool changed[256];
		changed[ currentIndex ] = false;

		if( thrj == 0 && thri == 0 )
		changed[ 0 ] = 10.0;
		changed[ 0 ] = true;
		__syncthreads();
		//SmallCycleBoolin = true;


		const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
		__shared__ Real hx;
		@@ -315,11 +304,9 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		hx = mesh.getSpaceSteps().x();
		hy = mesh.getSpaceSteps().y();
		}
		__syncthreads();

		__shared__ volatile Real sArray[ 18 ][ 18 ];
		sArray[thrj][thri] = TypeInfo< Real >::getMaxValue();
		__syncthreads();

		//filling sArray edges
		int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y();
		@@ -339,12 +326,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<

		if( numOfBlocky -1 == blIdy )
		ykolik = dimY - (blIdy)*blockDim.y+1;

		//filling BlockIterDevice
		//BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0.0;
		}
		/*if( blIdx == 2 && blIdy == 3 )
		BlockIterDevice[ currentIndex ] = 0.0;*/
		__syncthreads();

		if( thri == 0 )
		@@ -354,7 +336,6 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		else
		sArray[thrj+1][xkolik] = TypeInfo< Real >::getMaxValue();
		}
		__syncthreads();

		if( thri == 1 )
		{
		@@ -363,7 +344,6 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		else
		sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue();
		}
		__syncthreads();

		if( thrj == 0 )
		{
		@@ -372,7 +352,6 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		else
		sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue();
		}
		__syncthreads();

		if( thrj == 1 )
		{
		@@ -381,44 +360,47 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		else
		sArray[0][thri+1] = TypeInfo< Real >::getMaxValue();
		}
		__syncthreads();


		if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
		{
		sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ];
		__syncthreads();
		}

		int loopcounter;
		loopcounter = 0;
		__syncthreads();

		/*if( ( blIdx == 4 && blIdy == 5 ) \|\| ( blIdx == 2 && blIdy == 2 ) \|\| ( blIdx == 2 && blIdy == 3 ) \|\|
		( blIdx == 5 && blIdy == 5 ) \|\| ( blIdx == 6 && blIdy == 5 ) \|\| ( blIdx == 5 && blIdy == 4 ) \|\|
		( blIdx == 5 && blIdy == 2 ) \|\| ( blIdx == 3 && blIdy == 2 ) \|\| ( blIdx == 4 && blIdy == 2 ) )*/
		while( changed[ 0 ] > 0.1 )
		while( changed[ 0 ] )
		{
		changed[ currentIndex] = 0.0;
		__syncthreads();

		changed[ currentIndex] = false;

		//calculation of update cell
		if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
		{
		if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] )
		{
		changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx, hy );
		__syncthreads();
		}
		}
		__syncthreads();

		/*if( thri == 0 && thrj == 0 && blIdx == 1 && blIdy == 3 ){
		for( int h = 15; h > -1; h-- ){
		for( int g = 0; g < 16; g++ ){
		printf( "%d\t", changed[h*16+g] );
		}
		printf("\n");
		}
		printf("\n");
		}
		__syncthreads();*/

		//pyramid reduction
		if( blockDim.x*blockDim.y >= 256 )
		{
		if( currentIndex < 128 )
		{
		changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 128 ];
		changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 128 ];
		}
		}
		__syncthreads();
		@@ -426,52 +408,25 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		{
		if( currentIndex < 64 )
		{
		changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 64 ];
		changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 64 ];
		}
		}
		__syncthreads();
		if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU
		{
		if( true ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 32 ];
		if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 16 ];
		if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 8 ];
		if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 4 ];
		if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 2 ];
		if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 1 ];
		if( true ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 32 ];
		if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 16 ];
		if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 8 ];
		if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 4 ];
		if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 2 ];
		if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 1 ];
		}
		__syncthreads();



		loopcounter++;
		__syncthreads();
		if( currentIndex != 0 )
		changed[ currentIndex ] = 0.0;
		__syncthreads();

		if( loopcounter > 200 )
		changed[ currentIndex ] = 0.0;
		if( changed[ 0 ] && thri == 0 && thrj == 0 )
		BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ];
		__syncthreads();
		}
		/*__syncthreads();
		if( blIdy == 3 && blIdx == 1 )
		{
		BlockIterDevice[ currentIndex ] = changed[ currentIndex ];
		BlockIterDevice[ 0 ] = loopcounter;
		}*/
		__syncthreads();
		if( blIdx == 2 && blIdy == 3 )
		{
		//SmallCycleBoolin = changed[ 0 ];
		//if( changed[ 0 ] )
		BlockIterDevice[ currentIndex ] = changed[ currentIndex ];// loopcounter;
		BlockIterDevice[ 0 ] = loopcounter;

		}


		if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) )
		aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ];
		__syncthreads();
		}
		#endif