Fixed array binding in the Hamilton-Jacobi solver - using ArrayView instead of Array (84af6166) · Commits · TNL / tnl-dev

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h

+17 −17

Original line number	Diff line number	Diff line
		@@ -80,12 +80,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
		const Real velocity = 1.0 );

		template< int sizeSArray >
		void updateBlocks( InterfaceMapType interfaceMap,
		MeshFunctionType aux,
		MeshFunctionType helpFunc,
		ArrayContainer BlockIterHost, int numThreadsPerBlock/, Real sArray/ );
		void updateBlocks( const InterfaceMapType& interfaceMap,
		MeshFunctionType& aux,
		MeshFunctionType& helpFunc,
		ArrayContainer& BlockIterHost, int numThreadsPerBlock/, Real sArray/ );

		void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY );
		void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY );
		};

		template< typename Real,
		@@ -114,12 +114,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
		const RealType velocity = 1.0);

		template< int sizeSArray >
		void updateBlocks( const InterfaceMapType interfaceMap,
		const MeshFunctionType aux,
		void updateBlocks( const InterfaceMapType& interfaceMap,
		const MeshFunctionType& aux,
		MeshFunctionType& helpFunc,
		ArrayContainer BlockIterHost, int numThreadsPerBlock/, Real sArray/ );
		ArrayContainer& BlockIterHost, int numThreadsPerBlock/, Real sArray/ );

		void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ );
		void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ );

		template< int sizeSArray >
		__cuda_callable__ bool updateCell3D( volatile Real *sArray,
		@@ -147,15 +147,15 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0);
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0);

		template < typename Index >
		__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks );
		__global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks );

		template < typename Index >
		__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY );
		__global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY );

		template < typename Real, typename Device, typename Index >
		__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input,
		@@ -172,11 +172,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
		const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
		Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice );
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice );

		template < typename Index >
		__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,
		__global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom,
		int numBlockX, int numBlockY, int numBlockZ );
		#endif

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h

+9 −9

Original line number	Diff line number	Diff line
		@@ -96,10 +96,10 @@ template< typename Real,
		template< int sizeSArray >
		void
		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
		updateBlocks( InterfaceMapType interfaceMap,
		MeshFunctionType aux,
		MeshFunctionType helpFunc,
		ArrayContainer BlockIterHost, int numThreadsPerBlock/, Real sArray/ )
		updateBlocks( const InterfaceMapType& interfaceMap,
		MeshFunctionType& aux,
		MeshFunctionType& helpFunc,
		ArrayContainer& BlockIterHost, int numThreadsPerBlock/, Real sArray/ )
		{
		#pragma omp parallel for schedule( dynamic )
		for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
		@@ -270,10 +270,10 @@ template< typename Real,
		template< int sizeSArray >
		void
		tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >::
		updateBlocks( const InterfaceMapType interfaceMap,
		const MeshFunctionType aux,
		updateBlocks( const InterfaceMapType& interfaceMap,
		const MeshFunctionType& aux,
		MeshFunctionType& helpFunc,
		ArrayContainer BlockIterHost, int numThreadsPerBlock/, Real sArray/ )
		ArrayContainer& BlockIterHost, int numThreadsPerBlock/, Real sArray/ )
		{
		//#pragma omp parallel for schedule( dynamic )
		for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
		@@ -594,7 +594,7 @@ template< typename Real,
		typename Index >
		void
		tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >::
		getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ )
		getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ )
		{
		int* BlockIterPom;
		BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ];
		@@ -634,7 +634,7 @@ template< typename Real,
		typename Index >
		void
		tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
		getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY )
		getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY )
		{
		int* BlockIterPom;
		BlockIterPom = new int [numBlockX * numBlockY];

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h

+13 −13

Original line number	Diff line number	Diff line
		@@ -378,7 +378,7 @@ solve( const MeshPointer& mesh,
		auxPtr.template getData< Device>(),
		helpFunc.template modifyData< Device>(),
		BlockIterDevice,
		oddEvenBlock );
		oddEvenBlock.getView() );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		auxPtr = helpFunc;
		@@ -390,17 +390,17 @@ solve( const MeshPointer& mesh,
		auxPtr.template getData< Device>(),
		helpFunc.template modifyData< Device>(),
		BlockIterDevice,
		oddEvenBlock );
		oddEvenBlock.getView() );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		auxPtr = helpFunc;

		oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;

		CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) );
		CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
		CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;

		@@ -422,7 +422,7 @@ solve( const MeshPointer& mesh,
		interfaceMapPtr.template getData< Device >(),
		auxPtr.template modifyData< Device>(),
		helpFunc.template modifyData< Device>(),
		BlockIterDevice );
		BlockIterDevice.getView() );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;

		@@ -437,7 +437,7 @@ solve( const MeshPointer& mesh,
		//}
		//std::cout << std::endl;

		GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY );
		GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		BlockIterDevice = BlockIterPom;
		@@ -447,10 +447,10 @@ solve( const MeshPointer& mesh,
		//TNL::swap( auxPtr, helpFunc );


		CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) );
		CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) );
		TNL_CHECK_CUDA_DEVICE;

		CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
		CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks );
		TNL_CHECK_CUDA_DEVICE;


		@@ -489,8 +489,8 @@ solve( const MeshPointer& mesh,


		template < typename Index >
		__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY )
		__global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY )
		{
		int i = blockIdx.x * 1024 + threadIdx.x;

		@@ -515,8 +515,8 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index
		}

		template < typename Index >
		__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks )
		__global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks )
		{
		int i = threadIdx.x;
		int blId = blockIdx.x;
		@@ -588,7 +588,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
		const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
		Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock )
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock )
		{
		int thri = threadIdx.x; int thrj = threadIdx.y;
		int i = threadIdx.x + blockDim.x*blockIdx.x;

src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h

+7 −7

Original line number	Diff line number	Diff line
		@@ -383,20 +383,20 @@ solve( const MeshPointer& mesh,
		interfaceMapPtr.template getData< Device >(),
		auxPtr.template getData< Device>(),
		helpFunc.template modifyData< Device>(),
		BlockIterDevice );
		BlockIterDevice.getView() );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;

		GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY, numBlocksZ );
		GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		BlockIterDevice = BlockIterPom;

		CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) );
		CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY * numBlocksZ ) );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;

		CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
		CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks );
		cudaDeviceSynchronize();
		TNL_CHECK_CUDA_DEVICE;
		cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost);
		@@ -426,8 +426,8 @@ solve( const MeshPointer& mesh,

		#ifdef HAVE_CUDA
		template < typename Index >
		__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,
		__global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom,
		int numBlockX, int numBlockY, int numBlockZ )
		{
		int i = blockIdx.x * 1024 + threadIdx.x;
		@@ -462,7 +462,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
		const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
		const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
		Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc,
		TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice )
		TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice )
		{
		int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z;
		int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z;