Commit 84af6166 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed array binding in the Hamilton-Jacobi solver - using ArrayView instead of Array

parent 386e9647
Loading
Loading
Loading
Loading
+17 −17
Original line number Diff line number Diff line
@@ -80,12 +80,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
            const Real velocity = 1.0 );
    
    template< int sizeSArray >
    void updateBlocks( InterfaceMapType interfaceMap,
            MeshFunctionType aux,
            MeshFunctionType helpFunc,
            ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ );
    void updateBlocks( const InterfaceMapType& interfaceMap,
            MeshFunctionType& aux,
            MeshFunctionType& helpFunc,
            ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ );
    
    void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY  );
    void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY  );
};

template< typename Real,
@@ -114,12 +114,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
            const RealType velocity = 1.0);
    
    template< int sizeSArray >
    void updateBlocks( const InterfaceMapType interfaceMap,
            const MeshFunctionType aux,
    void updateBlocks( const InterfaceMapType& interfaceMap,
            const MeshFunctionType& aux,
            MeshFunctionType& helpFunc,
            ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ );
            ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ );
    
    void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ );
    void getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ );
    
    template< int sizeSArray >
    __cuda_callable__ bool updateCell3D( volatile Real *sArray,
@@ -147,15 +147,15 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
        const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
        const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
        Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0);
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0);

template < typename Index >
__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks );
__global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks );

template < typename Index >
__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY );
__global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY );

template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, 
@@ -172,11 +172,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
        const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
        const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice );
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice );

template < typename Index >
__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,
__global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom,
        int numBlockX, int numBlockY, int numBlockZ );
#endif

+9 −9
Original line number Diff line number Diff line
@@ -96,10 +96,10 @@ template< typename Real,
template< int sizeSArray >
void
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
updateBlocks( InterfaceMapType interfaceMap,
        MeshFunctionType aux,
        MeshFunctionType helpFunc,
        ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
updateBlocks( const InterfaceMapType& interfaceMap,
        MeshFunctionType& aux,
        MeshFunctionType& helpFunc,
        ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
{
#pragma omp parallel for schedule( dynamic )
  for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
@@ -270,10 +270,10 @@ template< typename Real,
template< int sizeSArray >
void
tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >::
updateBlocks( const InterfaceMapType interfaceMap,
        const MeshFunctionType aux,
updateBlocks( const InterfaceMapType& interfaceMap,
        const MeshFunctionType& aux,
        MeshFunctionType& helpFunc,
        ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
        ArrayContainer& BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
{  
//#pragma omp parallel for schedule( dynamic )
  for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
@@ -594,7 +594,7 @@ template< typename Real,
        typename Index >
void 
tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >::
getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ )
getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY, int numBlockZ )
{
  int* BlockIterPom; 
  BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ];
@@ -634,7 +634,7 @@ template< typename Real,
        typename Index >
void 
tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::
getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY )
getNeighbours( ArrayContainer& BlockIterHost, int numBlockX, int numBlockY )
{
  int* BlockIterPom; 
  BlockIterPom = new int [numBlockX * numBlockY];
+13 −13
Original line number Diff line number Diff line
@@ -378,7 +378,7 @@ solve( const MeshPointer& mesh,
         auxPtr.template getData< Device>(),
         helpFunc.template modifyData< Device>(),
         BlockIterDevice,
         oddEvenBlock );
         oddEvenBlock.getView() );
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
         auxPtr = helpFunc;
@@ -390,17 +390,17 @@ solve( const MeshPointer& mesh,
         auxPtr.template getData< Device>(),
         helpFunc.template modifyData< Device>(),
         BlockIterDevice,
         oddEvenBlock );
         oddEvenBlock.getView() );
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
         auxPtr = helpFunc;
         
         oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
         
         CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) );
         CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) );
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
         CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
         CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks );
         cudaDeviceSynchronize();
         TNL_CHECK_CUDA_DEVICE;
         
@@ -422,7 +422,7 @@ solve( const MeshPointer& mesh,
                interfaceMapPtr.template getData< Device >(),
                auxPtr.template modifyData< Device>(),
                helpFunc.template modifyData< Device>(),
                BlockIterDevice );
                BlockIterDevice.getView() );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
        
@@ -437,7 +437,7 @@ solve( const MeshPointer& mesh,
        //}
        //std::cout << std::endl;
        
        GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY );
        GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
        BlockIterDevice = BlockIterPom;
@@ -447,10 +447,10 @@ solve( const MeshPointer& mesh,
        //TNL::swap( auxPtr, helpFunc );
        
        
        CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) );
        CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY ) );
        TNL_CHECK_CUDA_DEVICE;
        
        CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
        CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks );
        TNL_CHECK_CUDA_DEVICE;
        
        
@@ -489,8 +489,8 @@ solve( const MeshPointer& mesh,


template < typename Index >
__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY )
__global__ void GetNeighbours( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY )
{
  int i = blockIdx.x * 1024 + threadIdx.x;
  
@@ -515,8 +515,8 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index
}

template < typename Index >
__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks )
__global__ void CudaParallelReduc( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > dBlock, int nBlocks )
{
  int i = threadIdx.x;
  int blId = blockIdx.x;
@@ -588,7 +588,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
        const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
        const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux,
        Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock )
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock )
{
  int thri = threadIdx.x; int thrj = threadIdx.y;
  int i = threadIdx.x + blockDim.x*blockIdx.x;
+7 −7
Original line number Diff line number Diff line
@@ -383,20 +383,20 @@ solve( const MeshPointer& mesh,
                interfaceMapPtr.template getData< Device >(),
                auxPtr.template getData< Device>(),
                helpFunc.template modifyData< Device>(),
                BlockIterDevice );
                BlockIterDevice.getView() );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
        
        GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY, numBlocksZ );
        GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
        BlockIterDevice = BlockIterPom;
        
        CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) );
        CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice.getView(), dBlock.getView(), ( numBlocksX * numBlocksY * numBlocksZ ) );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
        
        CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
        CudaParallelReduc<<< 1, nBlocks >>>( dBlock.getView(), dBlock.getView(), nBlocks );
        cudaDeviceSynchronize();
        TNL_CHECK_CUDA_DEVICE;
        cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost);
@@ -426,8 +426,8 @@ solve( const MeshPointer& mesh,

#ifdef HAVE_CUDA
template < typename Index >
__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,
__global__ void GetNeighbours3D( TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice,
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterPom,
        int numBlockX, int numBlockY, int numBlockZ )
{
  int i = blockIdx.x * 1024 + threadIdx.x;
@@ -462,7 +462,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid<
        const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
        const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux,
        Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc,
        TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice )
        TNL::Containers::ArrayView< int, Devices::Cuda, Index > BlockIterDevice )
{
  int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z;
  int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z;