Implementation of 1D solver is done for both CPU and GPU. 2D solver is now working fine (c822988e) · Commits · TNL / tnl-dev

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h

+16 −7

Original line number	Diff line number	Diff line
		@@ -40,8 +40,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >

		template< typename MeshEntity >
		__cuda_callable__ void updateCell( MeshFunctionType& u,
		const MeshEntity& cell );
		const MeshEntity& cell,
		const RealType velocity = 1.0 );

		__cuda_callable__ bool updateCell( volatile Real sArray[18],
		int thri, const Real h,
		const Real velocity = 1.0 );
		};


		@@ -72,12 +76,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
		__cuda_callable__ bool updateCell( volatile Real sArray[18][18],
		int thri, int thrj, const Real hx, const Real hy,
		const Real velocity = 1.0 );

		__cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18],
		int dimX, int dimY, int i, int j );

		/*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18],
		int dimX, int dimY, int i, int j );*/
		};

		template< typename Real,
		@@ -113,6 +111,17 @@ __cuda_callable__ void sortMinims( T1 pom[] );


		#ifdef HAVE_CUDA
		template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input,
		Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output,
		Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap );

		template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap,
		Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
		bool *BlockIterDevice );

		template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h

+183 −95

Original line number	Diff line number	Diff line
		@@ -20,6 +20,25 @@ tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
		initInterface( const MeshFunctionPointer& _input,
		MeshFunctionPointer& _output,
		InterfaceMapPointer& _interfaceMap )
		{
		if( std::is_same< Device, Devices::Cuda >::value )
		{
		#ifdef HAVE_CUDA
		const MeshType& mesh = _input->getMesh();

		const int cudaBlockSize( 16 );
		int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
		dim3 blockSize( cudaBlockSize );
		dim3 gridSize( numBlocksX );
		Devices::Cuda::synchronizeDevice();
		CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(),
		_output.template modifyData< Device >(),
		_interfaceMap.template modifyData< Device >() );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		#endif
		}
		if( std::is_same< Device, Devices::Host >::value )
		{
		const MeshType& mesh = _input->getMesh();
		typedef typename MeshType::Cell Cell;
		@@ -27,7 +46,20 @@ initInterface( const MeshFunctionPointer& _input,
		MeshFunctionType& output = _output.modifyData();
		InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
		Cell cell( mesh );
		for( cell.getCoordinates().x() = 1;
		for( cell.getCoordinates().x() = 0;
		cell.getCoordinates().x() < mesh.getDimensions().x();
		cell.getCoordinates().x() ++ )
		{
		cell.refresh();
		output[ cell.getIndex() ] =
		input( cell ) >= 0 ? TypeInfo< RealType >::getMaxValue() :
		- TypeInfo< RealType >::getMaxValue();
		interfaceMap[ cell.getIndex() ] = false;
		}


		const RealType& h = mesh.getSpaceSteps().x();
		for( cell.getCoordinates().x() = 0;
		cell.getCoordinates().x() < mesh.getDimensions().x() - 1;
		cell.getCoordinates().x() ++ )
		{
		@@ -36,29 +68,24 @@ initInterface( const MeshFunctionPointer& _input,
		if( ! cell.isBoundaryEntity() )
		{
		const auto& neighbors = cell.getNeighborEntities();
		const RealType& h = mesh.getSpaceSteps().x();
		Real pom = 0;
		//const IndexType& c = cell.getIndex();
		const IndexType e = neighbors.template getEntityIndex< 1 >();
		const IndexType w = neighbors.template getEntityIndex< -1 >();
		if( c * input[ e ] <= 0 )
		{
		output[ cell.getIndex() ] =
		c >= 0 ? ( h * c )/( c - input[ e ] ) :
		- ( h * c )/( c - input[ e ] );
		pom = TNL::sign( c )( h c )/( c - input[ e ]);
		if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) )
		output[ cell.getIndex() ] = pom;

		pom = pom - TNL::sign( c )h; //output[ e ] = (hx c)/( c - input[ e ]) - hx;
		if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
		output[ e ] = pom;

		interfaceMap[ cell.getIndex() ] = true;
		interfaceMap[ e ] = true;
		}
		if( c * input[ w ] <=0 )
		{
		output[ cell.getIndex() ] =
		c >= 0 ? ( h * c )/( c - input[ w ] ) :
		- ( h * c )/( c - input[ w ] );
		interfaceMap[ cell.getIndex() ] = true;
		}
		}
		output[ cell.getIndex() ] =
		c > 0 ? TNL::TypeInfo< RealType >::getMaxValue() :
		-TypeInfo< RealType >::getMaxValue();
		interfaceMap[ cell.getIndex() ] = false;
		}
		}

		@@ -69,8 +96,31 @@ template< typename Real,
		void
		tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
		updateCell( MeshFunctionType& u,
		const MeshEntity& cell )
		const MeshEntity& cell,
		const RealType v )
		{
		const auto& neighborEntities = cell.template getNeighborEntities< 1 >();
		const MeshType& mesh = cell.getMesh();
		const RealType& h = mesh.getSpaceSteps().x();
		const RealType value = u( cell );
		RealType a, tmp = TypeInfo< RealType >::getMaxValue();

		if( cell.getCoordinates().x() == 0 )
		a = u[ neighborEntities.template getEntityIndex< 1 >() ];
		else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 )
		a = u[ neighborEntities.template getEntityIndex< -1 >() ];
		else
		{
		a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ],
		u[ neighborEntities.template getEntityIndex< 1 >() ] );
		}

		if( fabs( a ) == TypeInfo< RealType >::getMaxValue() )
		return;

		tmp = a + TNL::sign( value ) * h/v;

		u[ cell.getIndex() ] = argAbsMin( value, tmp );
		}

		template< typename Real,
		@@ -103,7 +153,20 @@ initInterface( const MeshFunctionPointer& _input,
		}
		if( std::is_same< Device, Devices::Host >::value )
		{
		const MeshFunctionType& input = _input.getData();
		MeshFunctionType input = _input.getData();

		double A[320][320];
		std::ifstream fileInit("/home/maty/Downloads/initData.txt");

		for (int i = 0; i < 320; i++)
		for (int j = 0; j < 320; j++)
		fileInit >> A[i][j];
		fileInit.close();
		for (int i = 0; i < 320; i++)
		for (int j = 0; j < 320; j++)
		input[i*320 + j] = A[i][j];


		MeshFunctionType& output = _output.modifyData();
		InterfaceMapType& interfaceMap = _interfaceMap.modifyData();
		const MeshType& mesh = input.getMesh();
		@@ -632,64 +695,60 @@ __cuda_callable__ void sortMinims( T1 pom[] )
		}
		}

		template< typename Real,
		typename Device,
		typename Index >
		__cuda_callable__ void
		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
		setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy )


		#ifdef HAVE_CUDA
		template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input,
		Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output,
		Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap )
		{
		int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0);
		int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0);
		int xkolik = 18;
		int ykolik = 18;
		int xOd = 0;
		int yOd = 0;
		int i = threadIdx.x + blockDim.x*blockIdx.x;
		const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >();

		if( blockIdx == 0 )
		xOd = 1;
		if( blockIdy == 0 )
		yOd = 1;
		if( i < mesh.getDimensions().x() )
		{
		typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell;
		Cell cell( mesh );
		cell.getCoordinates().x() = i;
		cell.refresh();
		const Index cind = cell.getIndex();

		if( numOfBlockx - 1 == blockIdx )
		xkolik = dimX - (numOfBlockx-1)*16+1;

		if( numOfBlocky -1 == blockIdy )
		ykolik = dimY - (numOfBlocky-1)*16+1;
		output[ cind ] =
		input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() :
		- TypeInfo< Real >::getMaxValue();
		interfaceMap[ cind ] = false;

		if( dimX > (blockIdx+1) * 16 )
		{
		for( int i = yOd; i < ykolik; i++ )
		{
		sArray[i][17] = aux[ blockIdy16dimX - dimX + blockIdx16 - 1 + idimX + 17 ];
		}
		}
		if( blockIdx != 0 )
		{
		for( int i = yOd; i < ykolik; i++ )
		{
		sArray[i][0] = aux[ blockIdy16dimX - dimX + blockIdx16 - 1 + idimX + 0];
		}
		}
		if( dimY > (blockIdy+1) * 16 )
		const Real& h = mesh.getSpaceSteps().x();
		cell.refresh();
		const Real& c = input( cell );
		if( ! cell.isBoundaryEntity() )
		{
		for( int i = xOd; i < xkolik; i++ )
		auto neighbors = cell.getNeighborEntities();
		Real pom = 0;
		const Index e = neighbors.template getEntityIndex< 1 >();
		const Index w = neighbors.template getEntityIndex< -1 >();
		if( c * input[ e ] <= 0 )
		{
		sArray[17][i] = aux[ blockIdy16dimX - dimX + blockIdx16 - 1 + 17dimX + i ];
		}
		pom = TNL::sign( c )( h c )/( c - input[ e ]);
		if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) )
		output[ cind ] = pom;

		interfaceMap[ cind ] = true;
		}
		if( blockIdy != 0 )
		{
		for( int i = xOd; i < xkolik; i++ )
		if( c * input[ w ] <= 0 )
		{
		sArray[0][i] = aux[ blockIdy16dimX - dimX + blockIdx16 - 1 + 0dimX + i ];
		pom = TNL::sign( c )( h c )/( c - input[ w ]);
		if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) )
		output[ cind ] = pom;

		interfaceMap[ cind ] = true;
		}
		}
		}



		#ifdef HAVE_CUDA
		}
		template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
		@@ -886,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
		{
		sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrj ][ thri ];
		if ( fabs( tmp ) > 0.001 )
		if ( fabs( tmp ) > 0.01*hx )
		return true;
		else
		return false;
		@@ -898,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con
		( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] );
		sArray[ thrj ][ thri ] = argAbsMin( value, tmp );
		tmp = value - sArray[ thrj ][ thri ];
		if ( fabs( tmp ) > 0.01 )
		if ( fabs( tmp ) > 0.01*hx )
		return true;
		else
		return false;
		@@ -906,4 +965,33 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con

		return false;
		}

		template< typename Real,
		typename Device,
		typename Index >
		__cuda_callable__
		bool
		tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
		updateCell( volatile Real sArray[18], int thri, const Real h, const Real v )
		{
		const RealType value = sArray[ thri ];
		RealType a, tmp = TypeInfo< RealType >::getMaxValue();

		a = TNL::argAbsMin( sArray[ thri+1 ],
		sArray[ thri-1 ] );

		if( fabs( a ) == TypeInfo< RealType >::getMaxValue() )
		return false;

		tmp = a + TNL::sign( value ) * h/v;


		sArray[ thri ] = argAbsMin( value, tmp );

		tmp = value - sArray[ thri ];
		if ( fabs( tmp ) > 0.01*h )
		return true;
		else
		return false;
		}
		#endif

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h

+184 −7

Original line number	Diff line number	Diff line
		@@ -58,13 +58,190 @@ solve( const MeshPointer& mesh,
		const AnisotropyPointer& anisotropy,
		MeshFunctionPointer& u )
		{
		MeshFunctionPointer aux;
		InterfaceMapPointer interfaceMap;
		aux->setMesh( mesh );
		interfaceMap->setMesh( mesh );
		MeshFunctionPointer auxPtr;
		InterfaceMapPointer interfaceMapPtr;
		auxPtr->setMesh( mesh );
		interfaceMapPtr->setMesh( mesh );
		std::cout << "Initiating the interface cells ..." << std::endl;
		BaseType::initInterface( u, aux, interfaceMap );
		aux->save( "aux-ini.tnl" );
		BaseType::initInterface( u, auxPtr, interfaceMapPtr );

		auxPtr->save( "aux-ini.tnl" );

		typename MeshType::Cell cell( *mesh );

		IndexType iteration( 0 );
		InterfaceMapType interfaceMap = *interfaceMapPtr;
		MeshFunctionType aux = *auxPtr;
		while( iteration < this->maxIterations )
		{
		if( std::is_same< DeviceType, Devices::Host >::value )
		{
		for( cell.getCoordinates().x() = 0;
		cell.getCoordinates().x() < mesh->getDimensions().x();
		cell.getCoordinates().x()++ )
		{
		cell.refresh();
		if( ! interfaceMap( cell ) )
		this->updateCell( aux, cell );
		}


		for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
		cell.getCoordinates().x() >= 0 ;
		cell.getCoordinates().x()-- )
		{
		cell.refresh();
		if( ! interfaceMap( cell ) )
		this->updateCell( aux, cell );
		}
		}
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		// TODO: CUDA code
		#ifdef HAVE_CUDA
		const int cudaBlockSize( 16 );
		int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
		dim3 blockSize( cudaBlockSize );
		dim3 gridSize( numBlocksX );

		tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr;



		bool* BlockIter = (bool)malloc( ( numBlocksX ) sizeof( bool ) );

		bool *BlockIterDevice;
		cudaMalloc(&BlockIterDevice, ( numBlocksX ) * sizeof( bool ) );

		while( BlockIter[ 0 ] )
		{
		for( int i = 0; i < numBlocksX; i++ )
		BlockIter[ i ] = false;
		cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX ) * sizeof( bool ), cudaMemcpyHostToDevice);

		CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr,
		interfaceMapPtr.template getData< Device >(),
		auxPtr.template modifyData< Device>(),
		BlockIterDevice );
		cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX ) * sizeof( bool ), cudaMemcpyDeviceToHost);

		for( int i = 1; i < numBlocksX; i++ )
		BlockIter[ 0 ] = BlockIter[ 0 ] \|\| BlockIter[ i ];

		}
		delete[] BlockIter;
		cudaFree( BlockIterDevice );
		cudaDeviceSynchronize();

		TNL_CHECK_CUDA_DEVICE;

		aux = *auxPtr;
		interfaceMap = *interfaceMapPtr;
		#endif
		}
		iteration++;
		}

		aux.save("aux-final.tnl");
		}

		#ifdef HAVE_CUDA
		template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr,
		const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap,
		Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux,
		bool *BlockIterDevice )
		{
		int thri = threadIdx.x;
		int blIdx = blockIdx.x;
		int i = thri + blockDim.x*blIdx;

		__shared__ volatile bool changed[16];
		changed[ thri ] = false;

		if( thri == 0 )
		changed[ 0 ] = true;

		const Meshes::Grid< 1, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
		__shared__ Real h;
		if( thri == 1 )
		{
		h = mesh.getSpaceSteps().x();
		}

		__shared__ volatile Real sArray[ 18 ];
		sArray[thri] = TypeInfo< Real >::getMaxValue();

		//filling sArray edges
		int dimX = mesh.getDimensions().x();
		__shared__ volatile int numOfBlockx;
		__shared__ int xkolik;
		if( thri == 0 )
		{
		xkolik = blockDim.x + 1;
		numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0);

		if( numOfBlockx - 1 == blIdx )
		xkolik = dimX - (blIdx)*blockDim.x+1;
		}
		__syncthreads();

		if( thri == 0 )
		{
		if( dimX > (blIdx+1) * blockDim.x )
		sArray[xkolik] = aux[ blIdx*blockDim.x - 1 + xkolik ];
		else
		sArray[xkolik] = TypeInfo< Real >::getMaxValue();
		}

		if( thri == 1 )
		{
		if( blIdx != 0 )
		sArray[0] = aux[ blIdx*blockDim.x - 1 ];
		else
		sArray[0] = TypeInfo< Real >::getMaxValue();
		}


		if( i < mesh.getDimensions().x() )
		{
		sArray[thri+1] = aux[ i ];
		}
		__syncthreads();

		while( changed[ 0 ] )
		{
		__syncthreads();

		changed[ thri ] = false;

		//calculation of update cell
		if( i < mesh.getDimensions().x() )
		{
		if( ! interfaceMap[ i ] )
		{
		changed[ thri ] = ptr.updateCell( sArray, thri+1, h );
		}
		}
		__syncthreads();



		//pyramid reduction
		if( thri < 8 ) changed[ thri ] = changed[ thri ] \|\| changed[ thri + 8 ];
		if( thri < 4 ) changed[ thri ] = changed[ thri ] \|\| changed[ thri + 4 ];
		if( thri < 2 ) changed[ thri ] = changed[ thri ] \|\| changed[ thri + 2 ];
		if( thri < 1 ) changed[ thri ] = changed[ thri ] \|\| changed[ thri + 1 ];
		__syncthreads();

		if( changed[ 0 ] && thri == 0 )
		BlockIterDevice[ blIdx ] = true;
		__syncthreads();
		}

		if( i < mesh.getDimensions().x() && (!interfaceMap[ i ]) )
		aux[ i ] = sArray[ thri + 1 ];
		}
		#endif

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h

+36 −31

Original line number	Diff line number	Diff line
		@@ -18,13 +18,16 @@
		#include <TNL/Devices/Cuda.h>


		#include <iostream>
		#include <fstream>

		template< typename Real,
		typename Device,
		typename Index,
		typename Anisotropy >
		FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >::
		FastSweepingMethod()
		: maxIterations( 1 )
		: maxIterations( 2 )
		{

		}
		@@ -61,15 +64,31 @@ solve( const MeshPointer& mesh,
		const AnisotropyPointer& anisotropy,
		MeshFunctionPointer& u )
		{
		MeshFunctionType v;
		v.setMesh(mesh);
		double A[320][320];
		for (int i = 0; i < 320; i++)
		for (int j = 0; j < 320; j++)
		A[i][j] = 0;

		std::ifstream file("/home/maty/Downloads/mapa2.txt");

		for (int i = 0; i < 320; i++)
		for (int j = 0; j < 320; j++)
		file >> A[i][j];
		file.close();
		for (int i = 0; i < 320; i++)
		for (int j = 0; j < 320; j++)
		v[i*320 + j] = A[i][j];
		v.save("mapa.tnl");


		MeshFunctionPointer auxPtr;
		InterfaceMapPointer interfaceMapPtr;
		auxPtr->setMesh( mesh );
		interfaceMapPtr->setMesh( mesh );
		std::cout << "Initiating the interface cells ..." << std::endl;
		BaseType::initInterface( u, auxPtr, interfaceMapPtr );
		#ifdef HAVE_CUDA
		cudaDeviceSynchronize();
		#endif

		auxPtr->save( "aux-ini.tnl" );

		@@ -92,7 +111,7 @@ solve( const MeshPointer& mesh,
		{
		cell.refresh();
		if( ! interfaceMap( cell ) )
		this->updateCell( aux, cell );
		this->updateCell( aux, cell, v( cell ) );
		}
		}

		@@ -109,7 +128,7 @@ solve( const MeshPointer& mesh,
		//std::cerr << "2 -> ";
		cell.refresh();
		if( ! interfaceMap( cell ) )
		this->updateCell( aux, cell );
		this->updateCell( aux, cell, v( cell ) );
		}
		}

		@@ -126,7 +145,7 @@ solve( const MeshPointer& mesh,
		//std::cerr << "3 -> ";
		cell.refresh();
		if( ! interfaceMap( cell ) )
		this->updateCell( aux, cell );
		this->updateCell( aux, cell, v( cell ) );
		}
		}

		@@ -143,7 +162,7 @@ solve( const MeshPointer& mesh,
		//std::cerr << "4 -> ";
		cell.refresh();
		if( ! interfaceMap( cell ) )
		this->updateCell( aux, cell );
		this->updateCell( aux, cell, v( cell ) );
		}
		}

		@@ -222,7 +241,6 @@ solve( const MeshPointer& mesh,
		int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
		dim3 blockSize( cudaBlockSize, cudaBlockSize );
		dim3 gridSize( numBlocksX, numBlocksY );
		Devices::Cuda::synchronizeDevice();

		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;

		@@ -243,20 +261,8 @@ solve( const MeshPointer& mesh,
		interfaceMapPtr.template getData< Device >(),
		auxPtr.template modifyData< Device>(),
		BlockIterDevice );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost);

		/*for( int j = numBlocksY-1; j > -1; j-- )
		{
		for( int i = 0; i < numBlocksX; i++ )
		{
		std::cout << BlockIter[ j * numBlocksY + i ] << "\t";
		}
		std::cout << std::endl;
		}
		std::cout << std::endl;*/

		for( int i = 1; i < numBlocksX * numBlocksY; i++ )
		BlockIter[ 0 ] = BlockIter[ 0 ] \|\| BlockIter[ i ];

		@@ -294,12 +300,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<

		if( thrj == 0 && thri == 0 )
		changed[ 0 ] = true;
		__syncthreads();

		const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
		__shared__ Real hx;
		__shared__ Real hy;
		if( thrj == 0 && thri == 0 )
		if( thrj == 1 && thri == 1 )
		{
		hx = mesh.getSpaceSteps().x();
		hy = mesh.getSpaceSteps().y();
		@@ -345,20 +350,20 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue();
		}

		if( thrj == 0 )
		if( thri == 2 )
		{
		if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik )
		sArray[ykolik][thri+1] = aux[ blIdyblockDim.ydimX - dimX + blIdxblockDim.x - 1 + ykolikdimX + thri+1 ];
		sArray[ykolik][thrj+1] = aux[ blIdyblockDim.ydimX - dimX + blIdxblockDim.x - 1 + ykolikdimX + thrj+1 ];
		else
		sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue();
		sArray[ykolik][thrj+1] = TypeInfo< Real >::getMaxValue();
		}

		if( thrj == 1 )
		if( thri == 3 )
		{
		if( blIdy != 0 && thri+1 < xkolik )
		sArray[0][thri+1] = aux[ blIdyblockDim.ydimX - dimX + blIdx*blockDim.x - 1 + thri+1 ];
		sArray[0][thrj+1] = aux[ blIdyblockDim.ydimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ];
		else
		sArray[0][thri+1] = TypeInfo< Real >::getMaxValue();
		sArray[0][thrj+1] = TypeInfo< Real >::getMaxValue();
		}


		@@ -422,7 +427,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] \|\| changed[ currentIndex + 1 ];
		}
		if( changed[ 0 ] && thri == 0 && thrj == 0 )
		BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ];
		BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = true;
		__syncthreads();
		}

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h

+1 −1

File changed.

Contains only whitespace changes.