Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +17 −17 Original line number Diff line number Diff line Loading @@ -80,12 +80,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const Real velocity = 1.0 ); template< int sizeSArray > void updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void updateBlocks( const InterfaceMapType& interfaceMap, MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, Loading Loading @@ -114,12 +114,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > const RealType velocity = 1.0); template< int sizeSArray > void updateBlocks( const InterfaceMapType interfaceMap, const MeshFunctionType aux, void updateBlocks( const InterfaceMapType& interfaceMap, const MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); template< int sizeSArray > __cuda_callable__ bool updateCell3D( volatile Real *sArray, Loading Loading @@ -147,15 +147,15 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); __global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks ); template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); __global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Loading @@ -172,11 +172,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice ); template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, __global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY, int numBlockZ ); #endif Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +9 −9 Original line number Diff line number Diff line Loading @@ -96,10 +96,10 @@ template< typename Real, template< int sizeSArray > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) updateBlocks( const InterfaceMapType& interfaceMap, MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { #pragma omp parallel for schedule( dynamic ) for( IndexType i = 0; i < BlockIterHost.getSize(); i++ ) Loading Loading @@ -270,10 +270,10 @@ template< typename Real, template< int sizeSArray > void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateBlocks( const InterfaceMapType interfaceMap, const MeshFunctionType aux, updateBlocks( const InterfaceMapType& interfaceMap, const MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { //#pragma omp parallel for schedule( dynamic ) for( IndexType i = 0; i < BlockIterHost.getSize(); i++ ) Loading Loading @@ -594,7 +594,7 @@ template< typename Real, typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) { int* BlockIterPom; BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ]; Loading Loading @@ -634,7 +634,7 @@ template< typename Real, typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY ) { int* BlockIterPom; BlockIterPom = new int [numBlockX * numBlockY]; Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +13 −13 Original line number Diff line number Diff line Loading @@ -378,7 +378,7 @@ solve( const MeshPointer& mesh, auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); oddEvenBlock.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; auxPtr = helpFunc; Loading @@ -390,17 +390,17 @@ solve( const MeshPointer& mesh, auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); oddEvenBlock.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; auxPtr = helpFunc; oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; Loading @@ -422,7 +422,7 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice ); BlockIterDevice.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; Loading @@ -437,7 +437,7 @@ solve( const MeshPointer& mesh, //} //std::cout << std::endl; GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY ); GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; BlockIterDevice = BlockIterPom; Loading @@ -447,10 +447,10 @@ solve( const MeshPointer& mesh, //TNL::swap( auxPtr, helpFunc ); CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) ); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks ); TNL_CHECK_CUDA_DEVICE; Loading Loading @@ -489,8 +489,8 @@ solve( const MeshPointer& mesh, template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ) __global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ) { int i = blockIdx.x * 1024 + threadIdx.x; Loading @@ -515,8 +515,8 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index } template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) __global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks ) { int i = threadIdx.x; int blId = blockIdx.x; Loading Loading @@ -588,7 +588,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock ) TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int i = threadIdx.x + blockDim.x*blockIdx.x; Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +7 −7 Original line number Diff line number Diff line Loading @@ -383,20 +383,20 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice ); BlockIterDevice.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY, numBlocksZ ); GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; BlockIterDevice = BlockIterPom; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY * numBlocksZ ) ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); Loading Loading @@ -426,8 +426,8 @@ solve( const MeshPointer& mesh, #ifdef HAVE_CUDA template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, __global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY, int numBlockZ ) { int i = blockIdx.x * 1024 + threadIdx.x; Loading Loading @@ -462,7 +462,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; Loading Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +17 −17 Original line number Diff line number Diff line Loading @@ -80,12 +80,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const Real velocity = 1.0 ); template< int sizeSArray > void updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void updateBlocks( const InterfaceMapType& interfaceMap, MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, Loading Loading @@ -114,12 +114,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > const RealType velocity = 1.0); template< int sizeSArray > void updateBlocks( const InterfaceMapType interfaceMap, const MeshFunctionType aux, void updateBlocks( const InterfaceMapType& interfaceMap, const MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); template< int sizeSArray > __cuda_callable__ bool updateCell3D( volatile Real *sArray, Loading Loading @@ -147,15 +147,15 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); __global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks ); template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); __global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Loading @@ -172,11 +172,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice ); template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, __global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY, int numBlockZ ); #endif Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +9 −9 Original line number Diff line number Diff line Loading @@ -96,10 +96,10 @@ template< typename Real, template< int sizeSArray > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) updateBlocks( const InterfaceMapType& interfaceMap, MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { #pragma omp parallel for schedule( dynamic ) for( IndexType i = 0; i < BlockIterHost.getSize(); i++ ) Loading Loading @@ -270,10 +270,10 @@ template< typename Real, template< int sizeSArray > void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateBlocks( const InterfaceMapType interfaceMap, const MeshFunctionType aux, updateBlocks( const InterfaceMapType& interfaceMap, const MeshFunctionType& aux, MeshFunctionType& helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { //#pragma omp parallel for schedule( dynamic ) for( IndexType i = 0; i < BlockIterHost.getSize(); i++ ) Loading Loading @@ -594,7 +594,7 @@ template< typename Real, typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) { int* BlockIterPom; BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ]; Loading Loading @@ -634,7 +634,7 @@ template< typename Real, typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY ) { int* BlockIterPom; BlockIterPom = new int [numBlockX * numBlockY]; Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +13 −13 Original line number Diff line number Diff line Loading @@ -378,7 +378,7 @@ solve( const MeshPointer& mesh, auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); oddEvenBlock.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; auxPtr = helpFunc; Loading @@ -390,17 +390,17 @@ solve( const MeshPointer& mesh, auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); oddEvenBlock.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; auxPtr = helpFunc; oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; Loading @@ -422,7 +422,7 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice ); BlockIterDevice.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; Loading @@ -437,7 +437,7 @@ solve( const MeshPointer& mesh, //} //std::cout << std::endl; GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY ); GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; BlockIterDevice = BlockIterPom; Loading @@ -447,10 +447,10 @@ solve( const MeshPointer& mesh, //TNL::swap( auxPtr, helpFunc ); CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) ); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks ); TNL_CHECK_CUDA_DEVICE; Loading Loading @@ -489,8 +489,8 @@ solve( const MeshPointer& mesh, template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ) __global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ) { int i = blockIdx.x * 1024 + threadIdx.x; Loading @@ -515,8 +515,8 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index } template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) __global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks ) { int i = threadIdx.x; int blId = blockIdx.x; Loading Loading @@ -588,7 +588,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock ) TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int i = threadIdx.x + blockDim.x*blockIdx.x; Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +7 −7 Original line number Diff line number Diff line Loading @@ -383,20 +383,20 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), BlockIterDevice ); BlockIterDevice.getView() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY, numBlocksZ ); GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; BlockIterDevice = BlockIterPom; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY * numBlocksZ ) ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); Loading Loading @@ -426,8 +426,8 @@ solve( const MeshPointer& mesh, #ifdef HAVE_CUDA template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, __global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY, int numBlockZ ) { int i = blockIdx.x * 1024 + threadIdx.x; Loading Loading @@ -462,7 +462,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; Loading