CUDA init problem not solved (db36cd42) · Commits · TNL / tnl-dev

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h

+6 −2

Original line number	Diff line number	Diff line
		@@ -104,13 +104,17 @@ template < typename T1 >
		__cuda_callable__ void sortMinims( T1 pom[] );


		#ifdef HAVE_CUDA
		template < typename Real, typename Device, typename Index >
		__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux );

		template < typename Real, typename Device, typename Index >
		/*template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );*/

		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input );
		#endif

		#include "tnlDirectEikonalMethodsBase_impl.h"

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h

+21 −7

Original line number	Diff line number	Diff line
		@@ -83,25 +83,30 @@ initInterface( const MeshFunctionType& input,
		* overit is_same device
		* na kazdy bod jedno cuda vlakno
		*/
		const MeshType& mesh = input.getMesh();
		typedef typename MeshType::Cell Cell;
		Cell cell( mesh );


		if( std::is_same< Device, Devices::Cuda >::value )
		{
		#ifdef HAVE_CUDA
		const MeshType& mesh = input.getMesh();

		const int cudaBlockSize( 16 );
		int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize );
		int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize );
		dim3 blockSize( cudaBlockSize, cudaBlockSize );
		dim3 gridSize( numBlocksX, numBlocksY );
		Devices::Cuda::synchronizeDevice();
		CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap );
		//CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap );
		CudaInitCaller<<< gridSize, blockSize >>>( input );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		#endif
		}
		if( std::is_same< Device, Devices::Host >::value )
		{
		const MeshType& mesh = input.getMesh();
		typedef typename MeshType::Cell Cell;
		Cell cell( mesh );
		for( cell.getCoordinates().y() = 0;
		cell.getCoordinates().y() < mesh.getDimensions().y();
		cell.getCoordinates().y() ++ )
		@@ -597,7 +602,7 @@ __cuda_callable__ void sortMinims( T1 pom[])
		}
		}

		template < typename Real, typename Device, typename Index >
		/*template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap )
		@@ -606,7 +611,7 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
		int j = blockDim.y*blockIdx.y + threadIdx.y;
		const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh();

		if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
		//if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
		{
		typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell;
		Cell cell( mesh );
		@@ -666,4 +671,13 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
		}
		}
		}
		}*/


		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input )
		{
		int i = threadIdx.x + blockDim.x*blockIdx.x;
		int j = blockDim.y*blockIdx.y + threadIdx.y;
		//const Meshes::Grid< 2, double, TNL::Devices::Cuda, int >& mesh = input.getMesh();

		}

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h

+7 −13

Original line number	Diff line number	Diff line
		@@ -64,17 +64,9 @@ solve( const MeshPointer& mesh,
		interfaceMap.setMesh( mesh );
		std::cout << "Initiating the interface cells ..." << std::endl;
		BaseType::initInterface( u, aux, interfaceMap );
		cudaDeviceSynchronize();

		//if( std::is_same< DeviceType, Devices::Cuda >::value )
		//{
		// Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux;
		//cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost );
		//h_aux->save("aux-init-cuda.tnl");
		//}
		//if( std::is_same< DeviceType, Devices::Host >::value )
		{
		aux.save( "aux-ini.tnl" );
		}

		typename MeshType::Cell cell( *mesh );

		@@ -217,7 +209,8 @@ solve( const MeshPointer& mesh,
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		// TODO: CUDA code
		int numBlocks = 2;
		#ifdef HAVE_CUDA
		/*int numBlocks = 2;
		int threadsPerBlock;
		if( mesh->getDimensions().x() >= mesh->getDimensions().y() )
		threadsPerBlock = (int)( mesh->getDimensions().x() );
		@@ -225,7 +218,8 @@ solve( const MeshPointer& mesh,
		threadsPerBlock = (int)( mesh->getDimensions().y() );

		CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux );
		cudaDeviceSynchronize(); //copak dela?
		cudaDeviceSynchronize(); //copak dela?*/
		#endif
		}
		iteration++;
		}