From 5d1fab6e4fa455940aa0edf159e1ca3229738a36 Mon Sep 17 00:00:00 2001 From: fencl <fenclmat@fjfi.cvut.cz> Date: Mon, 23 Apr 2018 11:01:29 +0200 Subject: [PATCH] CUDA implemented for exact number of loops in 2D and 3D --- .../tnlDirectEikonalMethodsBase.h | 23 +- .../tnlDirectEikonalMethodsBase_impl.h | 456 +++++++++++------- .../hamilton-jacobi/tnlFastSweepingMethod.h | 2 + .../tnlFastSweepingMethod2D_impl.h | 49 +- .../tnlFastSweepingMethod3D_impl.h | 380 ++++++++------- 5 files changed, 542 insertions(+), 368 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index d8770b0e73..138671a3b7 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -67,9 +67,7 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > template< typename MeshEntity > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, - const RealType velocity = 1.0 ); - protected: - + const RealType velocity = 1.0 ); }; template< typename Real, @@ -78,7 +76,6 @@ template< typename Real, class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { public: - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; typedef Real RealType; typedef Device DevcieType; @@ -96,11 +93,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, const RealType velocity = 1.0); - - /*Real sort( Real a, Real b, Real c, - const RealType& ha, - const RealType& hb, - const RealType& hc ); */ }; template < typename T1, typename T2 > @@ -112,7 +104,8 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > -__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, +__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); template < typename Real, typename Device, typename Index > @@ -120,7 +113,15 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); -//__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ); +template < typename Real, typename Device, typename Index > +__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); + +template < typename Real, typename Device, typename Index > +__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux ); #endif #include "tnlDirectEikonalMethodsBase_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 37fd245128..bf78a02a48 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -11,6 +11,7 @@ #include <iostream> #include "tnlFastSweepingMethod.h" + template< typename Real, typename Device, typename Index > @@ -81,13 +82,7 @@ initInterface( const MeshFunctionPointer& _input, MeshFunctionPointer& _output, InterfaceMapPointer& _interfaceMap ) { - /* - doplnit přepočty pro cudu: - * overit is_same device - * na kazdy bod jedno cuda vlakno - */ - - + if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA @@ -102,7 +97,6 @@ initInterface( const MeshFunctionPointer& _input, CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), _output.template modifyData< Device >(), _interfaceMap.template modifyData< Device >() ); - //CudaInitCaller<<< gridSize, blockSize >>>( input ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; #endif @@ -215,6 +209,7 @@ template< typename Real, typename Device, typename Index > template< typename MeshEntity > +__cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( MeshFunctionType& u, @@ -301,162 +296,187 @@ initInterface( const MeshFunctionPointer& _input, MeshFunctionPointer& _output, InterfaceMapPointer& _interfaceMap ) { - const MeshFunctionType& input = _input.getData(); - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - const MeshType& mesh = input.getMesh(); - typedef typename MeshType::Cell Cell; - Cell cell( mesh ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh.getDimensions().z(); - cell.getCoordinates().z() ++ ) - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) > 0 ? TypeInfo< RealType >::getMaxValue() : - - TypeInfo< RealType >::getMaxValue(); - interfaceMap[ cell.getIndex() ] = false; - } - - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType& hz = mesh.getSpaceSteps().z(); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh.getDimensions().z(); - cell.getCoordinates().z() ++ ) - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const IndexType e = neighbors.template getEntityIndex< 1, 0, 0 >(); - const IndexType n = neighbors.template getEntityIndex< 0, 1, 0 >(); - const IndexType t = neighbors.template getEntityIndex< 0, 0, 1 >(); - //Try exact initiation - /*const IndexType w = neighbors.template getEntityIndex< -1, 0, 0 >(); - const IndexType s = neighbors.template getEntityIndex< 0, -1, 0 >(); - const IndexType b = neighbors.template getEntityIndex< 0, 0, -1 >(); - if( c * input[ e ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ e ] = input[ e ]; - interfaceMap[ e ] = true; - interfaceMap[ cell.getIndex() ] = true; - } - else if( c * input[ n ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ n ] = input[ n ]; - interfaceMap[ n ] = true; - interfaceMap[ cell.getIndex() ] = true; - } - else if( c * input[ t ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ t ] = input[ t ]; - interfaceMap[ t ] = true; - interfaceMap[ cell.getIndex() ] = true; - }*/ - if( c * input[ n ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - if ( output[ n ] < pom - hy) - output[ n ] = pom - hy; // ( hy * c )/( c - input[ n ]) - hy; - - }else - { - pom = - ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - if( output[ n ] > hy + pom ) - output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); - - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hx * c )/( c - input[ e ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( output[ e ] < pom ) - output[ e ] = pom; - - }else - { - pom = - (hx * c)/( c - input[ e ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ e ] > pom ) - output[ e ] = pom; - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - if( c * input[ t ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hz * c )/( c - input[ t ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - pom = pom - hz; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( output[ t ] < pom ) - output[ t ] = pom; - - }else - { - pom = - (hz * c)/( c - input[ t ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hz; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ t ] > pom ) - output[ t ] = pom; - - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ t ] = true; - } - } - /*output[ cell.getIndex() ] = - c > 0 ? TypeInfo< RealType >::getMaxValue() : - -TypeInfo< RealType >::getMaxValue(); - interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245 - } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 8 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); + if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) + std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; + dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; +#endif + } + if( std::is_same< Device, Devices::Host >::value ) + { + const MeshFunctionType& input = _input.getData(); + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + const MeshType& mesh = input.getMesh(); + typedef typename MeshType::Cell Cell; + Cell cell( mesh ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh.getDimensions().z(); + cell.getCoordinates().z() ++ ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) > 0 ? TypeInfo< RealType >::getMaxValue() : + - TypeInfo< RealType >::getMaxValue(); + interfaceMap[ cell.getIndex() ] = false; + } + + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType& hz = mesh.getSpaceSteps().z(); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh.getDimensions().z(); + cell.getCoordinates().z() ++ ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const IndexType e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const IndexType n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const IndexType t = neighbors.template getEntityIndex< 0, 0, 1 >(); + //Try exact initiation + /*const IndexType w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const IndexType s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const IndexType b = neighbors.template getEntityIndex< 0, 0, -1 >(); + if( c * input[ e ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ e ] = input[ e ]; + interfaceMap[ e ] = true; + interfaceMap[ cell.getIndex() ] = true; + } + else if( c * input[ n ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ n ] = input[ n ]; + interfaceMap[ n ] = true; + interfaceMap[ cell.getIndex() ] = true; + } + else if( c * input[ t ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ t ] = input[ t ]; + interfaceMap[ t ] = true; + interfaceMap[ cell.getIndex() ] = true; + }*/ + if( c * input[ n ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + if ( output[ n ] < pom - hy) + output[ n ] = pom - hy; // ( hy * c )/( c - input[ n ]) - hy; + + }else + { + pom = - ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + if( output[ n ] > hy + pom ) + output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); + + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; + } + if( c * input[ e ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hx * c )/( c - input[ e ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( output[ e ] < pom ) + output[ e ] = pom; + + }else + { + pom = - (hx * c)/( c - input[ e ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ e ] > pom ) + output[ e ] = pom; + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + } + if( c * input[ t ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hz * c )/( c - input[ t ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + pom = pom - hz; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( output[ t ] < pom ) + output[ t ] = pom; + + }else + { + pom = - (hz * c)/( c - input[ t ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hz; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ t ] > pom ) + output[ t ] = pom; + + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ t ] = true; + } + } + /*output[ cell.getIndex() ] = + c > 0 ? TypeInfo< RealType >::getMaxValue() : + -TypeInfo< RealType >::getMaxValue(); + interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245 + } + } } template< typename Real, typename Device, typename Index > template< typename MeshEntity > +__cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateCell( MeshFunctionType& u, @@ -628,12 +648,13 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); + const Index cind = cell.getIndex(); - output[ cell.getIndex() ] = + output[ cind ] = input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - TypeInfo< Real >::getMaxValue(); - interfaceMap[ cell.getIndex() ] = false; + interfaceMap[ cind ] = false; const Real& hx = mesh.getSpaceSteps().x(); const Real& hy = mesh.getSpaceSteps().y(); @@ -647,48 +668,131 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, const Index w = neighbors.template getEntityIndex< -1, 0 >(); const Index n = neighbors.template getEntityIndex< 0, 1 >(); const Index s = neighbors.template getEntityIndex< 0, -1 >(); - + if( c * input[ n ] <= 0 ) { pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ e ] <= 0 ) { pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; - interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ cind ] = true; } if( c * input[ w ] <= 0 ) { pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; - interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ cind ] = true; } if( c * input[ s ] <= 0 ) { pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; - interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ cind ] = true; } } } } - -/*__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ) +template < typename Real, typename Device, typename Index > +__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; - //const Meshes::Grid< 2, double, TNL::Devices::Cuda, int >& mesh = input.getMesh(); + int k = blockDim.z*blockIdx.z + threadIdx.z; + const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); -}*/ + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + { + typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; + cell.refresh(); + const Index cind = cell.getIndex(); + + + output[ cind ] = + input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : + - TypeInfo< Real >::getMaxValue(); + interfaceMap[ cind ] = false; + cell.refresh(); + + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + const Real& hz = mesh.getSpaceSteps().z(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); + const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ b ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ t ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + } + } +} diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index 48f3301320..e44aaca871 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -15,6 +15,7 @@ #include <TNL/SharedPointer.h> #include "tnlDirectEikonalMethodsBase.h" + template< typename Mesh, typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > class FastSweepingMethod @@ -145,6 +146,7 @@ class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy > const IndexType maxIterations; }; + #include "tnlFastSweepingMethod1D_impl.h" #include "tnlFastSweepingMethod2D_impl.h" #include "tnlFastSweepingMethod3D_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index e36224d075..73e1708958 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -14,6 +14,9 @@ #pragma once #include "tnlFastSweepingMethod.h" +#include <TNL/TypeInfo.h> +#include <TNL/Devices/Cuda.h> + template< typename Real, typename Device, @@ -212,15 +215,23 @@ solve( const MeshPointer& mesh, { // TODO: CUDA code #ifdef HAVE_CUDA - /*int numBlocks = 2; - int threadsPerBlock; - if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) - threadsPerBlock = (int)( mesh->getDimensions().x() ); - else - threadsPerBlock = (int)( mesh->getDimensions().y() ); + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY ); + Devices::Cuda::synchronizeDevice(); + int DIM = mesh->getDimensions().x(); - CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); - cudaDeviceSynchronize(); //copak dela?*/ + tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; + for( int k = 0; k < numBlocksX; k++) + CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; #endif } iteration++; @@ -228,30 +239,30 @@ solve( const MeshPointer& mesh, aux.save("aux-final.tnl"); } +//#ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > -__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, +__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) { int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = threadIdx.y + blockDim.y*blockIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.getMesh(); + int j = blockDim.y*blockIdx.y + threadIdx.y; + const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { - //make cell of aux from index typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); - - //update cell value few times - //for( int i = 0; i < mesh.getDimensions() ; i++ ) - //{ - cell.refresh(); + //tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; + for( int k = 0; k < 16; k++ ) + { if( ! interfaceMap( cell ) ) { - // tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::updateCell( aux, cell ); + ptr.updateCell( aux, cell ); } - //} + } } } +//#endif \ No newline at end of file diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 1fd8c9f2a0..3e8cb7bb19 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -21,7 +21,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() -: maxIterations( 1 ) +: maxIterations( 2 ) { } @@ -64,6 +64,7 @@ solve( const MeshPointer& mesh, interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + cudaDeviceSynchronize(); auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); @@ -71,172 +72,201 @@ solve( const MeshPointer& mesh, IndexType iteration( 0 ); MeshFunctionType aux = *auxPtr; InterfaceMapType interfaceMap = * interfaceMapPtr; - while( iteration < this->maxIterations ) - { - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-1.tnl" ); + while( iteration < this->maxIterations ) + { + if( std::is_same< DeviceType, Devices::Host >::value ) + { + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-1.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-3.tnl" ); - - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-4.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "5 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-5.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-2.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-3.tnl" ); - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "6 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-6.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "7 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-7.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-4.tnl" ); - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "5 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-5.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "6 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-6.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "7 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-7.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "8 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "8 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } + // TODO: CUDA code +#ifdef HAVE_CUDA + const int cudaBlockSize( 8 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z(), cudaBlockSize ); + if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) + std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; + dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); + Devices::Cuda::synchronizeDevice(); + + tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; + for( int k = 0; k < numBlocksX; k++) + CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; +#endif } + //aux.save( "aux-8.tnl" ); iteration++; @@ -244,3 +274,29 @@ solve( const MeshPointer& mesh, aux.save("aux-final.tnl"); } +template < typename Real, typename Device, typename Index > +__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux ) +{ + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + int k = blockDim.z*blockIdx.z + threadIdx.z; + const Meshes::Grid< 3, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + { + typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; + cell.refresh(); + //tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; + for( int l = 0; l < 8; l++ ) + { + if( ! interfaceMap( cell ) ) + { + ptr.updateCell( aux, cell ); + } + } + } +} \ No newline at end of file -- GitLab