From 852534a88eab1e78b21d3fd41665dc7ca556c878 Mon Sep 17 00:00:00 2001 From: Fencl <fenclmat@fjfi.cvut.cz> Date: Fri, 16 Nov 2018 12:03:40 +0100 Subject: [PATCH] 3D FSM+FIM implemented 2D FSM+FIM method pickes size of rectangular block depending on number of blocks --- .../tnlDirectEikonalMethodsBase.h | 214 ++++---- .../tnlDirectEikonalMethodsBase_impl.h | 519 +++++++++++++++--- .../hamilton-jacobi/tnlFastSweepingMethod.h | 222 ++++---- .../tnlFastSweepingMethod2D_impl.h | 74 ++- .../tnlFastSweepingMethod3D_impl.h | 455 +++++++++------ 5 files changed, 1004 insertions(+), 480 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 7d990c1bb1..f712ce2cc9 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -19,102 +19,112 @@ class tnlDirectEikonalMethodsBase }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > { - public: - - typedef Meshes::Grid< 1, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0 ); - - __cuda_callable__ bool updateCell( volatile Real sArray[18], - int thri, const Real h, - const Real velocity = 1.0 ); + public: + + typedef Meshes::Grid< 1, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0 ); + + __cuda_callable__ bool updateCell( volatile Real sArray[18], + int thri, const Real h, + const Real velocity = 1.0 ); }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { - public: - typedef Meshes::Grid< 2, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; - typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0 ); - - template< int sizeSArray > - __cuda_callable__ bool updateCell( volatile Real *sArray, - int thri, int thrj, const Real hx, const Real hy, - const Real velocity = 1.0 ); - - template< int sizeSArray > - void updateBlocks( InterfaceMapType interfaceMap, - MeshFunctionType aux, - MeshFunctionType helpFunc, - ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); - - void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); + public: + typedef Meshes::Grid< 2, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0 ); + + template< int sizeSArray > + __cuda_callable__ bool updateCell( volatile Real *sArray, + int thri, int thrj, const Real hx, const Real hy, + const Real velocity = 1.0 ); + + template< int sizeSArray > + void updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { - public: - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0); - - __cuda_callable__ bool updateCell( volatile Real sArray[10][10][10], - int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, - const Real velocity = 1.0 ); + public: + typedef Meshes::Grid< 3, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0); + + template< int sizeSArray > + void updateBlocks( const InterfaceMapType interfaceMap, + const MeshFunctionType aux, + MeshFunctionType& helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); + + template< int sizeSArray > + __cuda_callable__ bool updateCell3D( volatile Real *sArray, + int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, + const Real velocity = 1.0 ); }; template < typename T1, typename T2 > @@ -126,46 +136,46 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, - bool *BlockIterDevice ); + const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, + bool *BlockIterDevice ); template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 5083544e2b..8f7937541d 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -148,6 +148,7 @@ updateBlocks( InterfaceMapType interfaceMap, } + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) @@ -263,6 +264,370 @@ updateBlocks( InterfaceMapType interfaceMap, } } } +template< typename Real, + typename Device, + typename Index > +template< int sizeSArray > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +updateBlocks( const InterfaceMapType interfaceMap, + const MeshFunctionType aux, + MeshFunctionType& helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) +{ +//#pragma omp parallel for schedule( dynamic ) + for( int i = 0; i < BlockIterHost.getSize(); i++ ) + { + if( BlockIterHost[ i ] ) + { + MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + int dimZ = mesh.getDimensions().z(); + //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; + int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockz = dimZ/numThreadsPerBlock + ((dimZ%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; + int xkolik = numThreadsPerBlock + 1; + int ykolik = numThreadsPerBlock + 1; + int zkolik = numThreadsPerBlock + 1; + + + int blIdz = i/( numOfBlockx * numOfBlocky ); + int blIdy = (i-blIdz*numOfBlockx * numOfBlocky )/(numOfBlockx ); + int blIdx = (i-blIdz*numOfBlockx * numOfBlocky )%( numOfBlockx ); + //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*numThreadsPerBlock+1; + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*numThreadsPerBlock+1; + if( numOfBlockz-1 == blIdz ) + zkolik = dimZ - (blIdz)*numThreadsPerBlock+1; + //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; + + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; + changed[ 0 ] = 1;*/ + Real hx = mesh.getSpaceSteps().x(); + Real hy = mesh.getSpaceSteps().y(); + Real hz = mesh.getSpaceSteps().z(); + + bool changed = false; + BlockIterHost[ i ] = 0; + + + Real *sArray; + sArray = new Real[ sizeSArray * sizeSArray * sizeSArray ]; + if( sArray == nullptr ) + std::cout << "Error while allocating memory for sArray." << std::endl; + + for( int k = 0; k < sizeSArray; k++ ) + for( int l = 0; l < sizeSArray; l++ ) + for( int m = 0; m < sizeSArray; m++ ){ + sArray[ m * sizeSArray * sizeSArray + k * sizeSArray + l ] = std::numeric_limits< Real >::max(); + } + + + for( int thrk = 0; thrk < numThreadsPerBlock; thrk++ ) + for( int thrj = 0; thrj < numThreadsPerBlock; thrj++ ) + { + if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj * dimX -1 + thrk*dimX*dimY ]; + + if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + xkolik ] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy *numThreadsPerBlock*dimX+ blIdx*numThreadsPerBlock + numThreadsPerBlock + thrj * dimX + thrk*dimX*dimY ]; + + if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray +0*sizeSArray + thrj+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock - dimX + thrj + thrk*dimX*dimY ]; + + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + (blIdy+1) * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj + thrk*dimX*dimY ]; + + if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock - dimX * dimY + thrj * dimX + thrk ]; + + if( dimZ > (blIdz+1) * numThreadsPerBlock && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = + aux[ (blIdz+1)*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj * dimX + thrk ]; + } + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + sArray[(m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1] = + aux[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ]; + } + } + } + /*string s; + int numWhile = 0; + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ){ + //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + //printf("In with point m = %d, k = %d, l = %d\n", m, k, l); + changed = this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz) || changed; + + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l <numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s ); + */ + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = 0; l <numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = 0; l <numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + if( changed ){ + BlockIterHost[ i ] = 1; + } + + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) { + for( int m = 0; m < numThreadsPerBlock; m++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ){ + helpFunc[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] = + sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + //std::cout << helpFunc[ m*dimX*dimY + k*dimX + l ] << " "; + } + } + //std::cout << std::endl; + } + //std::cout << std::endl; + } + //helpFunc.save( "helpF.tnl"); + delete []sArray; + } + } +} +template< typename Real, + typename Device, + typename Index > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) +{ + int* BlockIterPom; + BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ]; + + for( int i = 0; i< BlockIterHost.getSize(); i++) + { + BlockIterPom[ i ] = 0; + + int m=0, l=0, k=0; + l = i/( numBlockX * numBlockY ); + k = (i-l*numBlockX * numBlockY )/(numBlockX ); + m = (i-l*numBlockX * numBlockY )%( numBlockX ); + + if( m > 0 && BlockIterHost[ i - 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( l > 0 && BlockIterHost[ i - numBlockX*numBlockY ] ){ + BlockIterPom[ i ] = 1; + }else if( l < numBlockZ-1 && BlockIterHost[ i + numBlockX*numBlockY ] ){ + BlockIterPom[ i ] = 1; + } + } + for( int i = 0; i< BlockIterHost.getSize(); i++) + { + BlockIterHost[ i ] = BlockIterPom[ i ]; + } +} + template< typename Real, typename Device, @@ -619,8 +984,8 @@ initInterface( const MeshFunctionPointer& _input, { cell.refresh(); output[ cell.getIndex() ] = - input( cell ) > 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); + input( cell ) > 0 ? 10://std::numeric_limits< RealType >::max() : + -10;//- std::numeric_limits< RealType >::max(); interfaceMap[ cell.getIndex() ] = false; } @@ -967,6 +1332,82 @@ updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real return false; } +template< typename Real, + typename Device, + typename Index > +template< int sizeSArray > +__cuda_callable__ +bool +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +updateCell3D( volatile Real *sArray, int thri, int thrj, int thrk, + const Real hx, const Real hy, const Real hz, const Real v ) +{ + const RealType value = sArray[thrk *sizeSArray * sizeSArray + thrj * sizeSArray + thri]; + + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + c = TNL::argAbsMin( sArray[ (thrk+1)* sizeSArray*sizeSArray + thrj * sizeSArray + thri ], + sArray[ (thrk-1) * sizeSArray *sizeSArray + thrj* sizeSArray + thri ] ); + + b = TNL::argAbsMin( sArray[ thrk* sizeSArray*sizeSArray + (thrj+1) * sizeSArray + thri ], + sArray[ thrk* sizeSArray * sizeSArray + (thrj-1)* sizeSArray +thri ] ); + + a = TNL::argAbsMin( sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri+1 ], + sArray[ thrk* sizeSArray * sizeSArray + thrj* sizeSArray +thri-1 ] ); + + /*if( thrk == 8 ) + printf("Calculating a = %f, b = %f, c = %f\n" , a, b, c );*/ + + if( fabs( a ) == 10&& //std::numeric_limits< RealType >::max() && + fabs( b ) == 10&&//std::numeric_limits< RealType >::max() && + fabs( c ) == 10)//std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + + sortMinims( pom ); + + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) + { + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + } + + return false; +} #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -1215,78 +1656,4 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) else return false; } - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool -tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: -updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, - const Real hx, const Real hy, const Real hz, const Real v ) -{ - const RealType value = sArray[thrk][thrj][thri]; - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], - sArray[ thrk-1 ][ thrj ][ thri ] ); - - b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], - sArray[ thrk ][ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], - sArray[ thrk ][ thrj ][ thri-1 ] ); - - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - - sortMinims( pom ); - - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - } - - return false; -} #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index 60c690e062..57b1886e8b 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -1,9 +1,9 @@ /*************************************************************************** - FastSweepingMethod.h - description - ------------------- - begin : Jul 14, 2016 - copyright : (C) 2017 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz + FastSweepingMethod.h - description + ------------------- + begin : Jul 14, 2016 + copyright : (C) 2017 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ @@ -17,132 +17,134 @@ template< typename Mesh, - typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > + typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > class FastSweepingMethod { }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 1, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 1, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 1, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + + protected: const IndexType maxIterations; }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 2, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - using typename BaseType::ArrayContainer; - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 2, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + protected: const IndexType maxIterations; }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 3, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; + + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + + protected: const IndexType maxIterations; }; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index b823fec037..07be36571f 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -15,9 +15,12 @@ #include "tnlFastSweepingMethod.h" #include <TNL/Devices/Cuda.h> -#include <string.h> +#include <TNL/Communicators/MpiDefs.h> + + +#include <string.h> #include <iostream> #include <fstream> @@ -80,16 +83,48 @@ solve( const MeshPointer& mesh, MeshFunctionType aux = *auxPtr; +//#ifdef HAVE_MPI + bool a = Communicators::MpiCommunicator::IsInitialized(); + if( a ) + printf("Je Init\n"); + else + printf("Neni Init\n"); +//#endif while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 16; + int numThreadsPerBlock = -1; + + numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0)); + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + if( numThreadsPerBlock <= 16 ) + numThreadsPerBlock = 16; + else if(numThreadsPerBlock <= 32 ) + numThreadsPerBlock = 32; + else if(numThreadsPerBlock <= 64 ) + numThreadsPerBlock = 64; + else if(numThreadsPerBlock <= 128 ) + numThreadsPerBlock = 128; + else if(numThreadsPerBlock <= 256 ) + numThreadsPerBlock = 256; + else if(numThreadsPerBlock <= 512 ) + numThreadsPerBlock = 512; + else + numThreadsPerBlock = 1024; + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + + if( numThreadsPerBlock == -1 ){ + printf("Fail in setting numThreadsPerBlock.\n"); + break; + } + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; /*Real **sArray = new Real*[numBlocksX*numBlocksY]; @@ -115,13 +150,29 @@ solve( const MeshPointer& mesh, } std::cout<<std::endl;*/ unsigned int numWhile = 0; - while( IsCalculationDone && numWhile < 1 ) + while( IsCalculationDone ) { IsCalculationDone = 0; helpFunc1 = auxPtr; auxPtr = helpFunc; helpFunc = helpFunc1; - this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + switch ( numThreadsPerBlock ){ + case 16: + this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 32: + this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 64: + this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 128: + this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 256: + this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 512: + this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + default: + this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + } + //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ @@ -131,14 +182,14 @@ solve( const MeshPointer& mesh, } } numWhile++; - std::cout <<"numWhile = "<< numWhile <<std::endl; + /*std::cout <<"numWhile = "<< numWhile <<std::endl; for( int j = numBlocksY-1; j>-1; j-- ){ for( int i = 0; i < numBlocksX; i++ ) std::cout << BlockIterHost[ j * numBlocksX + i ]; std::cout << std::endl; } - std::cout << std::endl; + std::cout << std::endl;*/ this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY ); @@ -150,8 +201,8 @@ solve( const MeshPointer& mesh, std::cout << std::endl;*/ //std::cout<<std::endl; - string s( "aux-"+ std::to_string(numWhile) + ".tnl"); - aux.save( s ); + //string s( "aux-"+ std::to_string(numWhile) + ".tnl"); + //aux.save( s ); } if( numWhile == 1 ){ auxPtr = helpFunc; @@ -266,8 +317,8 @@ solve( const MeshPointer& mesh, BlockIterPom.setSize( numBlocksX * numBlocksY ); BlockIterPom.setValue( 0 ); /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -408,6 +459,7 @@ solve( const MeshPointer& mesh, } iteration++; } + //#endif aux.save("aux-final.tnl"); } @@ -527,7 +579,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< /** FOR FIM METHOD */ - + if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] ) { __syncthreads(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 65aba5bf58..5af33cf296 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -64,9 +64,6 @@ solve( const MeshPointer& mesh, interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); -#ifdef HAVE_CUDA - cudaDeviceSynchronize(); -#endif auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); @@ -78,170 +75,259 @@ solve( const MeshPointer& mesh, { if( std::is_same< DeviceType, Devices::Host >::value ) { - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-1.tnl" ); + int numThreadsPerBlock = 64; - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-3.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-4.tnl" ); + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksZ = mesh->getDimensions().z() / numThreadsPerBlock + (mesh->getDimensions().z() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "5 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-5.tnl" ); + /*Real **sArray = new Real*[numBlocksX*numBlocksY]; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + sArray[ i ] = new Real [ (numThreadsPerBlock + 2)*(numThreadsPerBlock + 2)];*/ - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "6 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-6.tnl" ); + ArrayContainer BlockIterHost; + BlockIterHost.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterHost.setValue( 1 ); + int IsCalculationDone = 1; - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "7 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + MeshFunctionPointer helpFunc( mesh ); + MeshFunctionPointer helpFunc1( mesh ); + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + //std::cout<< "Size = " << BlockIterHost.getSize() << std::endl; + /*for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<<std::endl; + } + std::cout<<std::endl;*/ + unsigned int numWhile = 0; + while( IsCalculationDone ) + { + IsCalculationDone = 0; + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + + //Reduction + for( int i = 0; i < BlockIterHost.getSize(); i++ ){ + if( IsCalculationDone == 0 ){ + IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; + //break; } } - } - //aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "8 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + numWhile++; + std::cout <<"numWhile = "<< numWhile <<std::endl; + /*for( int k = 0; k < numBlocksZ; k++ ){ + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ){ + //std::cout << (*auxPtr)[ k * numBlocksX * numBlocksY + j * numBlocksX + i ] << " "; + std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ]; + } + std::cout << std::endl; } + std::cout << std::endl; } + std::cout << std::endl;*/ + + this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY, numBlocksZ ); + + /*for( int k = 0; k < numBlocksZ; k++ ){ + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ){ + //std::cout << (*auxPtr)[ k * numBlocksX * numBlocksY + j * numBlocksX + i ] << " "; + std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ]; + } + std::cout << std::endl; + } + std::cout << std::endl; + }*/ + + /*for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << "BlockIterHost = "<< j*numBlocksX + i<< " ," << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + + //std::cout<<std::endl; + //string s( "aux-"+ std::to_string(numWhile) + ".tnl"); + //aux.save( s ); + } + if( numWhile == 1 ){ + auxPtr = helpFunc; } + aux = *auxPtr; + + /*for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-2.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-4.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "5 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-5.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "6 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-6.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "7 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-7.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "8 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + }*/ } if( std::is_same< DeviceType, Devices::Cuda >::value ) { @@ -389,7 +475,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< { __syncthreads(); - __shared__ volatile bool changed[ (sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)]; + __shared__ volatile bool changed[ 8*8*8/*(sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)*/]; changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 && thrk == 0 ) @@ -402,6 +488,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thrj == 1 && thri == 1 && thrk == 1 ) { + //printf( "We are in the calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ); hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); hz = mesh.getSpaceSteps().z(); @@ -410,8 +497,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< dimZ = mesh.getDimensions().z(); BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] = 0; } - __shared__ volatile Real sArray[sizeSArray][sizeSArray][sizeSArray]; - sArray[thrk+1][thrj+1][thri+1] = std::numeric_limits< Real >::max(); + __shared__ volatile Real sArray[ 10*10*10/*sizeSArray * sizeSArray * sizeSArray*/ ]; + sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = std::numeric_limits< Real >::max(); //filling sArray edges int numOfBlockx; @@ -426,6 +513,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< numOfBlockx = gridDim.x; numOfBlocky = gridDim.y; numOfBlockz = gridDim.z; + __syncthreads(); if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*blockDim.x+1; @@ -438,54 +526,55 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thri == 0 ) { if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; + sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; else - sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); + sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + xkolik ] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; else - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1)*sizeSArray + xkolik] = std::numeric_limits< Real >::max(); } if( thri == 2 ) { if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray +0*sizeSArray + thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; else - sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + 0*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 3 ) { if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; else - sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 4 ) { if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; + sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; else - sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + sArray[0 * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thrk+1] = std::numeric_limits< Real >::max(); } if( thri == 5 ) { if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; else - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = std::numeric_limits< Real >::max(); } if( i < dimX && j < dimY && k < dimZ ) { - sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; + sArray[(thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; } __syncthreads(); + while( changed[ 0 ] ) { __syncthreads(); @@ -493,11 +582,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< changed[ currentIndex ] = false; //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) + if( i < dimX && j < dimY && k < dimZ ) { - if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) + if( ! interfaceMap[ k*dimX*dimY + j * dimX + i ] ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); + changed[ currentIndex ] = ptr.updateCell3D< sizeSArray >( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); } } __syncthreads(); @@ -535,7 +624,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } } __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + if( currentIndex < 32 ) { if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; @@ -548,7 +637,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) { - for(int m = 0; m < 8; m++){ + //for(int m = 0; m < 8; m++){ + int m = 4; for(int n = 0; n<8; n++){ for(int b=0; b<8; b++) printf(" %i ", changed[m*64 + n*8 + b]); @@ -556,16 +646,19 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } printf("\n \n"); } - }*/ + //}*/ + if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) { - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + //printf( "Setting block calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ); + BlockIterDevice[ blIdz * gridDim.x * gridDim.y + blIdy * gridDim.x + blIdx ] = 1; } __syncthreads(); } if( i < dimX && j < dimY && k < dimZ ) - helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; + helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thri+1 ]; + } } #endif -- GitLab