diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 979c70e7b5d48ec7b8bedcbbcab562dc72b72857..fe7d4b9ba8171c2beb52486b1e177fb087abc850 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -40,8 +40,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > template< typename MeshEntity > __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell ); + const MeshEntity& cell, + const RealType velocity = 1.0 ); + __cuda_callable__ bool updateCell( volatile Real sArray[18], + int thri, const Real h, + const Real velocity = 1.0 ); }; @@ -72,12 +76,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ bool updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); - - __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18], - int dimX, int dimY, int i, int j ); - - /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18], - int dimX, int dimY, int i, int j );*/ }; template< typename Real, @@ -113,6 +111,17 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA +template < typename Real, typename Device, typename Index > +__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); + +template < typename Real, typename Device, typename Index > +__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, + const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, + bool *BlockIterDevice ); + template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 9ec95c25f686a8cda33ae034f7084110d27562cc..c742a11b501c818beccf88b94c1671b133c4b8cb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -21,45 +21,72 @@ initInterface( const MeshFunctionPointer& _input, MeshFunctionPointer& _output, InterfaceMapPointer& _interfaceMap ) { - const MeshType& mesh = _input->getMesh(); - typedef typename MeshType::Cell Cell; - const MeshFunctionType& input = _input.getData(); - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - Cell cell( mesh ); - for( cell.getCoordinates().x() = 1; - cell.getCoordinates().x() < mesh.getDimensions().x() - 1; - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - const auto& neighbors = cell.getNeighborEntities(); - const RealType& h = mesh.getSpaceSteps().x(); - //const IndexType& c = cell.getIndex(); - const IndexType e = neighbors.template getEntityIndex< 1 >(); - const IndexType w = neighbors.template getEntityIndex< -1 >(); - if( c * input[ e ] <=0 ) - { - output[ cell.getIndex() ] = - c >= 0 ? ( h * c )/( c - input[ e ] ) : - - ( h * c )/( c - input[ e ] ); - interfaceMap[ cell.getIndex() ] = true; - } - if( c * input[ w ] <=0 ) - { - output[ cell.getIndex() ] = - c >= 0 ? ( h * c )/( c - input[ w ] ) : - - ( h * c )/( c - input[ w ] ); - interfaceMap[ cell.getIndex() ] = true; - } - } - output[ cell.getIndex() ] = - c > 0 ? TNL::TypeInfo< RealType >::getMaxValue() : - -TypeInfo< RealType >::getMaxValue(); - interfaceMap[ cell.getIndex() ] = false; - } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize ); + dim3 gridSize( numBlocksX ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; +#endif + } + if( std::is_same< Device, Devices::Host >::value ) + { + const MeshType& mesh = _input->getMesh(); + typedef typename MeshType::Cell Cell; + const MeshFunctionType& input = _input.getData(); + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + Cell cell( mesh ); + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) >= 0 ? TypeInfo< RealType >::getMaxValue() : + - TypeInfo< RealType >::getMaxValue(); + interfaceMap[ cell.getIndex() ] = false; + } + + + const RealType& h = mesh.getSpaceSteps().x(); + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x() - 1; + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + const auto& neighbors = cell.getNeighborEntities(); + Real pom = 0; + //const IndexType& c = cell.getIndex(); + const IndexType e = neighbors.template getEntityIndex< 1 >(); + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ e ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + + pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) + output[ e ] = pom; + + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + } + } + } + } } template< typename Real, @@ -69,8 +96,31 @@ template< typename Real, void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell ) + const MeshEntity& cell, + const RealType v ) { + const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); + const MeshType& mesh = cell.getMesh(); + const RealType& h = mesh.getSpaceSteps().x(); + const RealType value = u( cell ); + RealType a, tmp = TypeInfo< RealType >::getMaxValue(); + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], + u[ neighborEntities.template getEntityIndex< 1 >() ] ); + } + + if( fabs( a ) == TypeInfo< RealType >::getMaxValue() ) + return; + + tmp = a + TNL::sign( value ) * h/v; + + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } template< typename Real, @@ -103,7 +153,20 @@ initInterface( const MeshFunctionPointer& _input, } if( std::is_same< Device, Devices::Host >::value ) { - const MeshFunctionType& input = _input.getData(); + MeshFunctionType input = _input.getData(); + + double A[320][320]; + std::ifstream fileInit("/home/maty/Downloads/initData.txt"); + + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + fileInit >> A[i][j]; + fileInit.close(); + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + input[i*320 + j] = A[i][j]; + + MeshFunctionType& output = _output.modifyData(); InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); const MeshType& mesh = input.getMesh(); @@ -186,7 +249,7 @@ initInterface( const MeshFunctionPointer& _input, pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) - output[ e ] = pom; + output[ e ] = pom; /*}else { pom = - (hx * c)/( c - input[ e ]); @@ -632,64 +695,60 @@ __cuda_callable__ void sortMinims( T1 pom[] ) } } -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ void -tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: -setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) -{ - int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); - int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); - int xkolik = 18; - int ykolik = 18; - int xOd = 0; - int yOd = 0; - - if( blockIdx == 0 ) - xOd = 1; - if( blockIdy == 0 ) - yOd = 1; - - if( numOfBlockx - 1 == blockIdx ) - xkolik = dimX - (numOfBlockx-1)*16+1; - if( numOfBlocky -1 == blockIdy ) - ykolik = dimY - (numOfBlocky-1)*16+1; + +#ifdef HAVE_CUDA +template < typename Real, typename Device, typename Index > +__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) +{ + int i = threadIdx.x + blockDim.x*blockIdx.x; + const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); - if( dimX > (blockIdx+1) * 16 ) - { - for( int i = yOd; i < ykolik; i++ ) - { - sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ]; - } - } - if( blockIdx != 0 ) - { - for( int i = yOd; i < ykolik; i++ ) - { - sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0]; - } - } - if( dimY > (blockIdy+1) * 16 ) + if( i < mesh.getDimensions().x() ) { - for( int i = xOd; i < xkolik; i++ ) + typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; + cell.refresh(); + const Index cind = cell.getIndex(); + + + output[ cind ] = + input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : + - TypeInfo< Real >::getMaxValue(); + interfaceMap[ cind ] = false; + + const Real& h = mesh.getSpaceSteps().x(); + cell.refresh(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ]; + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1 >(); + const Index w = neighbors.template getEntityIndex< -1 >(); + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } } - if( blockIdy != 0 ) - { - for( int i = xOd; i < xkolik; i++ ) - { - sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ]; - } - } + } - - - -#ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, @@ -886,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001 ) + if ( fabs( tmp ) > 0.01*hx ) return true; else return false; @@ -898,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01 ) + if ( fabs( tmp ) > 0.01*hx ) return true; else return false; @@ -906,4 +965,33 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con return false; } + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +bool +tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: +updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) +{ + const RealType value = sArray[ thri ]; + RealType a, tmp = TypeInfo< RealType >::getMaxValue(); + + a = TNL::argAbsMin( sArray[ thri+1 ], + sArray[ thri-1 ] ); + + if( fabs( a ) == TypeInfo< RealType >::getMaxValue() ) + return false; + + tmp = a + TNL::sign( value ) * h/v; + + + sArray[ thri ] = argAbsMin( value, tmp ); + + tmp = value - sArray[ thri ]; + if ( fabs( tmp ) > 0.01*h ) + return true; + else + return false; +} #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h index 57ac7d6896f4db6dc91389af45d9d7c6cef43ec2..d1eb8fe2e82caa6b3111da3f8d6ce9be3ca7bf3c 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h @@ -125,7 +125,7 @@ bool tnlDirectEikonalProblem< Mesh, Anisotropy, Real, Index >:: solve( const MeshPointer& mesh, DofVectorPointer& dofs ) -{ +{ FastSweepingMethod< MeshType, AnisotropyType > fsm; fsm.solve( mesh, anisotropy, initialData ); return true; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h index 105f1ceda9b7af99e896e9aadae910330a543004..87974c6847bbc60e60177b33c5884eca3f2ef0ec 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h @@ -57,14 +57,191 @@ FastSweepingMethod< Meshes::Grid< 1, Real, Device, Index >, Anisotropy >:: solve( const MeshPointer& mesh, const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ) -{ - MeshFunctionPointer aux; - InterfaceMapPointer interfaceMap; - aux->setMesh( mesh ); - interfaceMap->setMesh( mesh ); +{ + MeshFunctionPointer auxPtr; + InterfaceMapPointer interfaceMapPtr; + auxPtr->setMesh( mesh ); + interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; - BaseType::initInterface( u, aux, interfaceMap ); - aux->save( "aux-ini.tnl" ); + BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + + auxPtr->save( "aux-ini.tnl" ); + + typename MeshType::Cell cell( *mesh ); + + IndexType iteration( 0 ); + InterfaceMapType interfaceMap = *interfaceMapPtr; + MeshFunctionType aux = *auxPtr; + while( iteration < this->maxIterations ) + { + if( std::is_same< DeviceType, Devices::Host >::value ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + + + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { + // TODO: CUDA code +#ifdef HAVE_CUDA + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize ); + dim3 gridSize( numBlocksX ); + + tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr; + + + + bool* BlockIter = (bool*)malloc( ( numBlocksX ) * sizeof( bool ) ); + + bool *BlockIterDevice; + cudaMalloc(&BlockIterDevice, ( numBlocksX ) * sizeof( bool ) ); + + while( BlockIter[ 0 ] ) + { + for( int i = 0; i < numBlocksX; i++ ) + BlockIter[ i ] = false; + cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX ) * sizeof( bool ), cudaMemcpyHostToDevice); + + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice ); + cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX ) * sizeof( bool ), cudaMemcpyDeviceToHost); + + for( int i = 1; i < numBlocksX; i++ ) + BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; + + } + delete[] BlockIter; + cudaFree( BlockIterDevice ); + cudaDeviceSynchronize(); + + TNL_CHECK_CUDA_DEVICE; + + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; +#endif + } + iteration++; + } + + aux.save("aux-final.tnl"); +} + +#ifdef HAVE_CUDA +template < typename Real, typename Device, typename Index > +__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, + const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, + bool *BlockIterDevice ) +{ + int thri = threadIdx.x; + int blIdx = blockIdx.x; + int i = thri + blockDim.x*blIdx; + + __shared__ volatile bool changed[16]; + changed[ thri ] = false; + + if( thri == 0 ) + changed[ 0 ] = true; + + const Meshes::Grid< 1, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + __shared__ Real h; + if( thri == 1 ) + { + h = mesh.getSpaceSteps().x(); + } + + __shared__ volatile Real sArray[ 18 ]; + sArray[thri] = TypeInfo< Real >::getMaxValue(); + + //filling sArray edges + int dimX = mesh.getDimensions().x(); + __shared__ volatile int numOfBlockx; + __shared__ int xkolik; + if( thri == 0 ) + { + xkolik = blockDim.x + 1; + numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*blockDim.x+1; + } + __syncthreads(); + + if( thri == 0 ) + { + if( dimX > (blIdx+1) * blockDim.x ) + sArray[xkolik] = aux[ blIdx*blockDim.x - 1 + xkolik ]; + else + sArray[xkolik] = TypeInfo< Real >::getMaxValue(); + } + + if( thri == 1 ) + { + if( blIdx != 0 ) + sArray[0] = aux[ blIdx*blockDim.x - 1 ]; + else + sArray[0] = TypeInfo< Real >::getMaxValue(); + } + + + if( i < mesh.getDimensions().x() ) + { + sArray[thri+1] = aux[ i ]; + } + __syncthreads(); + while( changed[ 0 ] ) + { + __syncthreads(); + + changed[ thri ] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() ) + { + if( ! interfaceMap[ i ] ) + { + changed[ thri ] = ptr.updateCell( sArray, thri+1, h ); + } + } + __syncthreads(); + + + + //pyramid reduction + if( thri < 8 ) changed[ thri ] = changed[ thri ] || changed[ thri + 8 ]; + if( thri < 4 ) changed[ thri ] = changed[ thri ] || changed[ thri + 4 ]; + if( thri < 2 ) changed[ thri ] = changed[ thri ] || changed[ thri + 2 ]; + if( thri < 1 ) changed[ thri ] = changed[ thri ] || changed[ thri + 1 ]; + __syncthreads(); + + if( changed[ 0 ] && thri == 0 ) + BlockIterDevice[ blIdx ] = true; + __syncthreads(); + } + + if( i < mesh.getDimensions().x() && (!interfaceMap[ i ]) ) + aux[ i ] = sArray[ thri + 1 ]; } +#endif + diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index e727104086713a7c0e54c55b5a343f06b17c8f42..e5a9c39f30ace9f19f3a5d527550fb8d1db9f6f8 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -18,13 +18,16 @@ #include <TNL/Devices/Cuda.h> +#include <iostream> +#include <fstream> + template< typename Real, typename Device, typename Index, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() -: maxIterations( 1 ) +: maxIterations( 2 ) { } @@ -61,15 +64,31 @@ solve( const MeshPointer& mesh, const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ) { + MeshFunctionType v; + v.setMesh(mesh); + double A[320][320]; + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + A[i][j] = 0; + + std::ifstream file("/home/maty/Downloads/mapa2.txt"); + + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + file >> A[i][j]; + file.close(); + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + v[i*320 + j] = A[i][j]; + v.save("mapa.tnl"); + + MeshFunctionPointer auxPtr; InterfaceMapPointer interfaceMapPtr; auxPtr->setMesh( mesh ); interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); -#ifdef HAVE_CUDA - cudaDeviceSynchronize(); -#endif auxPtr->save( "aux-ini.tnl" ); @@ -92,7 +111,7 @@ solve( const MeshPointer& mesh, { cell.refresh(); if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + this->updateCell( aux, cell, v( cell ) ); } } @@ -109,7 +128,7 @@ solve( const MeshPointer& mesh, //std::cerr << "2 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + this->updateCell( aux, cell, v( cell ) ); } } @@ -126,7 +145,7 @@ solve( const MeshPointer& mesh, //std::cerr << "3 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + this->updateCell( aux, cell, v( cell ) ); } } @@ -143,7 +162,7 @@ solve( const MeshPointer& mesh, //std::cerr << "4 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + this->updateCell( aux, cell, v( cell ) ); } } @@ -222,7 +241,6 @@ solve( const MeshPointer& mesh, int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); - Devices::Cuda::synchronizeDevice(); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; @@ -243,20 +261,8 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost); - - /*for( int j = numBlocksY-1; j > -1; j-- ) - { - for( int i = 0; i < numBlocksX; i++ ) - { - std::cout << BlockIter[ j * numBlocksY + i ] << "\t"; - } - std::cout << std::endl; - } - std::cout << std::endl;*/ - + for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; @@ -294,12 +300,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thrj == 0 && thri == 0 ) changed[ 0 ] = true; - __syncthreads(); const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real hx; __shared__ Real hy; - if( thrj == 0 && thri == 0 ) + if( thrj == 1 && thri == 1 ) { hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); @@ -345,20 +350,20 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue(); } - if( thrj == 0 ) + if( thri == 2 ) { if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) - sArray[ykolik][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thri+1 ]; + sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; else - sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue(); + sArray[ykolik][thrj+1] = TypeInfo< Real >::getMaxValue(); } - if( thrj == 1 ) + if( thri == 3 ) { if( blIdy != 0 && thri+1 < xkolik ) - sArray[0][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thri+1 ]; + sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; else - sArray[0][thri+1] = TypeInfo< Real >::getMaxValue(); + sArray[0][thrj+1] = TypeInfo< Real >::getMaxValue(); } @@ -422,7 +427,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } if( changed[ 0 ] && thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ]; + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = true; __syncthreads(); }