diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index a89d230f6497fc0145712d002c38be1a110157f0..e52f2fe1c6a86dfe6483b6156aff424b16819d88 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -632,170 +632,6 @@ __cuda_callable__ void sortMinims( T1 pom[] ) } } -template < typename Real, typename Device, typename Index > -__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - - TypeInfo< Real >::getMaxValue(); - interfaceMap[ cind ] = false; - - const Real& hx = mesh.getSpaceSteps().x(); - const Real& hy = mesh.getSpaceSteps().y(); - cell.refresh(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1, 0 >(); - const Index w = neighbors.template getEntityIndex< -1, 0 >(); - const Index n = neighbors.template getEntityIndex< 0, 1 >(); - const Index s = neighbors.template getEntityIndex< 0, -1 >(); - - if( c * input[ n ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cell.getIndex() ] = true; - } - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ s ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } - } -} - -template < typename Real, typename Device, typename Index > -__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - int k = blockDim.z*blockIdx.z + threadIdx.z; - const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) - { - typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - - TypeInfo< Real >::getMaxValue(); - interfaceMap[ cind ] = false; - cell.refresh(); - - const Real& hx = mesh.getSpaceSteps().x(); - const Real& hy = mesh.getSpaceSteps().y(); - const Real& hz = mesh.getSpaceSteps().z(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); - const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); - const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); - const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); - const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); - const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); - - if( c * input[ n ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ s ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ b ] <= 0 ) - { - pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ t ] <= 0 ) - { - pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } - } -} - template< typename Real, typename Device, typename Index > @@ -967,6 +803,173 @@ getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int b } }*/ + +#ifdef HAVE_CUDA +template < typename Real, typename Device, typename Index > +__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) +{ + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + const Meshes::Grid< 2, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; + cell.refresh(); + const Index cind = cell.getIndex(); + + + output[ cind ] = + input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : + - TypeInfo< Real >::getMaxValue(); + interfaceMap[ cind ] = false; + + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + cell.refresh(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1 >(); + const Index s = neighbors.template getEntityIndex< 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cell.getIndex() ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + } + } +} + +template < typename Real, typename Device, typename Index > +__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) +{ + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + int k = blockDim.z*blockIdx.z + threadIdx.z; + const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + { + typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; + cell.refresh(); + const Index cind = cell.getIndex(); + + + output[ cind ] = + input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : + - TypeInfo< Real >::getMaxValue(); + interfaceMap[ cind ] = false; + cell.refresh(); + + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + const Real& hz = mesh.getSpaceSteps().z(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); + const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ b ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ t ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + } + } +} + + template< typename Real, typename Device, typename Index > @@ -1022,4 +1025,5 @@ updateCell( Real sArray[18][18], int thri, int thrj, const Real hx, const Real h //u[ cell.getIndex() ]= argAbsMin( value, tmp ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); } -} \ No newline at end of file +} +#endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 80976772cde6e1d9ffcad92bf8779cea17906e29..1279e3437f29cddaf1ddd1bc1d17b3c05dca5df7 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -67,7 +67,9 @@ solve( const MeshPointer& mesh, interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); +#ifdef HAVE_CUDA cudaDeviceSynchronize(); +#endif auxPtr->save( "aux-ini.tnl" ); @@ -240,7 +242,7 @@ solve( const MeshPointer& mesh, aux.save("aux-final.tnl"); } -//#ifdef HAVE_CUDA +#ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, @@ -286,4 +288,4 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } //ptr.getsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); } -//#endif \ No newline at end of file +#endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 5ddd3bdeb039e38ef904a4b574c90bbd4dd89e68..e9fa72e6664b14cf937f1424cc0b0bf17cd0a1bf 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -64,7 +64,9 @@ solve( const MeshPointer& mesh, interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); +#ifdef HAVE_CUDA cudaDeviceSynchronize(); +#endif auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); @@ -274,6 +276,7 @@ solve( const MeshPointer& mesh, aux.save("aux-final.tnl"); } +#ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, @@ -299,4 +302,5 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } } } -} \ No newline at end of file +} +#endif