Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +16 −7 Original line number Diff line number Diff line Loading @@ -40,8 +40,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > template< typename MeshEntity > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell ); const MeshEntity& cell, const RealType velocity = 1.0 ); __cuda_callable__ bool updateCell( volatile Real sArray[18], int thri, const Real h, const Real velocity = 1.0 ); }; Loading Loading @@ -72,12 +76,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ bool updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j ); /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j );*/ }; template< typename Real, Loading Loading @@ -113,6 +111,17 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +183 −95 Original line number Diff line number Diff line Loading @@ -20,6 +20,25 @@ tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, MeshFunctionPointer& _output, InterfaceMapPointer& _interfaceMap ) { if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA const MeshType& mesh = _input->getMesh(); const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); dim3 blockSize( cudaBlockSize ); dim3 gridSize( numBlocksX ); Devices::Cuda::synchronizeDevice(); CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), _output.template modifyData< Device >(), _interfaceMap.template modifyData< Device >() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { const MeshType& mesh = _input->getMesh(); typedef typename MeshType::Cell Cell; Loading @@ -27,7 +46,20 @@ initInterface( const MeshFunctionPointer& _input, MeshFunctionType& output = _output.modifyData(); InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); Cell cell( mesh ); for( cell.getCoordinates().x() = 1; for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh.getDimensions().x(); cell.getCoordinates().x() ++ ) { cell.refresh(); output[ cell.getIndex() ] = input( cell ) >= 0 ? TypeInfo< RealType >::getMaxValue() : - TypeInfo< RealType >::getMaxValue(); interfaceMap[ cell.getIndex() ] = false; } const RealType& h = mesh.getSpaceSteps().x(); for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh.getDimensions().x() - 1; cell.getCoordinates().x() ++ ) { Loading @@ -36,29 +68,24 @@ initInterface( const MeshFunctionPointer& _input, if( ! cell.isBoundaryEntity() ) { const auto& neighbors = cell.getNeighborEntities(); const RealType& h = mesh.getSpaceSteps().x(); Real pom = 0; //const IndexType& c = cell.getIndex(); const IndexType e = neighbors.template getEntityIndex< 1 >(); const IndexType w = neighbors.template getEntityIndex< -1 >(); if( c * input[ e ] <= 0 ) { output[ cell.getIndex() ] = c >= 0 ? ( h * c )/( c - input[ e ] ) : - ( h * c )/( c - input[ e ] ); pom = TNL::sign( c )*( h * c )/( c - input[ e ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) output[ e ] = pom; interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; } if( c * input[ w ] <=0 ) { output[ cell.getIndex() ] = c >= 0 ? ( h * c )/( c - input[ w ] ) : - ( h * c )/( c - input[ w ] ); interfaceMap[ cell.getIndex() ] = true; } } output[ cell.getIndex() ] = c > 0 ? TNL::TypeInfo< RealType >::getMaxValue() : -TypeInfo< RealType >::getMaxValue(); interfaceMap[ cell.getIndex() ] = false; } } Loading @@ -69,8 +96,31 @@ template< typename Real, void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( MeshFunctionType& u, const MeshEntity& cell ) const MeshEntity& cell, const RealType v ) { const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); const MeshType& mesh = cell.getMesh(); const RealType& h = mesh.getSpaceSteps().x(); const RealType value = u( cell ); RealType a, tmp = TypeInfo< RealType >::getMaxValue(); if( cell.getCoordinates().x() == 0 ) a = u[ neighborEntities.template getEntityIndex< 1 >() ]; else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) a = u[ neighborEntities.template getEntityIndex< -1 >() ]; else { a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], u[ neighborEntities.template getEntityIndex< 1 >() ] ); } if( fabs( a ) == TypeInfo< RealType >::getMaxValue() ) return; tmp = a + TNL::sign( value ) * h/v; u[ cell.getIndex() ] = argAbsMin( value, tmp ); } template< typename Real, Loading Loading @@ -103,7 +153,20 @@ initInterface( const MeshFunctionPointer& _input, } if( std::is_same< Device, Devices::Host >::value ) { const MeshFunctionType& input = _input.getData(); MeshFunctionType input = _input.getData(); double A[320][320]; std::ifstream fileInit("/home/maty/Downloads/initData.txt"); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) fileInit >> A[i][j]; fileInit.close(); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) input[i*320 + j] = A[i][j]; MeshFunctionType& output = _output.modifyData(); InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); const MeshType& mesh = input.getMesh(); Loading Loading @@ -632,64 +695,60 @@ __cuda_callable__ void sortMinims( T1 pom[] ) } } template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) { int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; int xOd = 0; int yOd = 0; int i = threadIdx.x + blockDim.x*blockIdx.x; const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); if( blockIdx == 0 ) xOd = 1; if( blockIdy == 0 ) yOd = 1; if( i < mesh.getDimensions().x() ) { typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.refresh(); const Index cind = cell.getIndex(); if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+1; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+1; output[ cind ] = input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - TypeInfo< Real >::getMaxValue(); interfaceMap[ cind ] = false; if( dimX > (blockIdx+1) * 16 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ]; } } if( blockIdx != 0 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0]; } } if( dimY > (blockIdy+1) * 16 ) const Real& h = mesh.getSpaceSteps().x(); cell.refresh(); const Real& c = input( cell ); if( ! cell.isBoundaryEntity() ) { for( int i = xOd; i < xkolik; i++ ) auto neighbors = cell.getNeighborEntities(); Real pom = 0; const Index e = neighbors.template getEntityIndex< 1 >(); const Index w = neighbors.template getEntityIndex< -1 >(); if( c * input[ e ] <= 0 ) { sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ]; } pom = TNL::sign( c )*( h * c )/( c - input[ e ]); if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) output[ cind ] = pom; interfaceMap[ cind ] = true; } if( blockIdy != 0 ) { for( int i = xOd; i < xkolik; i++ ) if( c * input[ w ] <= 0 ) { sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ]; pom = TNL::sign( c )*( h * c )/( c - input[ w ]); if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) output[ cind ] = pom; interfaceMap[ cind ] = true; } } } #ifdef HAVE_CUDA } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Loading Loading @@ -886,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.001 ) if ( fabs( tmp ) > 0.01*hx ) return true; else return false; Loading @@ -898,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.01 ) if ( fabs( tmp ) > 0.01*hx ) return true; else return false; Loading @@ -906,4 +965,33 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con return false; } template< typename Real, typename Device, typename Index > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) { const RealType value = sArray[ thri ]; RealType a, tmp = TypeInfo< RealType >::getMaxValue(); a = TNL::argAbsMin( sArray[ thri+1 ], sArray[ thri-1 ] ); if( fabs( a ) == TypeInfo< RealType >::getMaxValue() ) return false; tmp = a + TNL::sign( value ) * h/v; sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; if ( fabs( tmp ) > 0.01*h ) return true; else return false; } #endif src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h +184 −7 Original line number Diff line number Diff line Loading @@ -58,13 +58,190 @@ solve( const MeshPointer& mesh, const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ) { MeshFunctionPointer aux; InterfaceMapPointer interfaceMap; aux->setMesh( mesh ); interfaceMap->setMesh( mesh ); MeshFunctionPointer auxPtr; InterfaceMapPointer interfaceMapPtr; auxPtr->setMesh( mesh ); interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); aux->save( "aux-ini.tnl" ); BaseType::initInterface( u, auxPtr, interfaceMapPtr ); auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); IndexType iteration( 0 ); InterfaceMapType interfaceMap = *interfaceMapPtr; MeshFunctionType aux = *auxPtr; while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh->getDimensions().x(); cell.getCoordinates().x()++ ) { cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); } for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; cell.getCoordinates().x() >= 0 ; cell.getCoordinates().x()-- ) { cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); } } if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code #ifdef HAVE_CUDA const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); dim3 blockSize( cudaBlockSize ); dim3 gridSize( numBlocksX ); tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr; bool* BlockIter = (bool*)malloc( ( numBlocksX ) * sizeof( bool ) ); bool *BlockIterDevice; cudaMalloc(&BlockIterDevice, ( numBlocksX ) * sizeof( bool ) ); while( BlockIter[ 0 ] ) { for( int i = 0; i < numBlocksX; i++ ) BlockIter[ i ] = false; cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX ) * sizeof( bool ), cudaMemcpyHostToDevice); CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX ) * sizeof( bool ), cudaMemcpyDeviceToHost); for( int i = 1; i < numBlocksX; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; } delete[] BlockIter; cudaFree( BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; interfaceMap = *interfaceMapPtr; #endif } iteration++; } aux.save("aux-final.tnl"); } #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ) { int thri = threadIdx.x; int blIdx = blockIdx.x; int i = thri + blockDim.x*blIdx; __shared__ volatile bool changed[16]; changed[ thri ] = false; if( thri == 0 ) changed[ 0 ] = true; const Meshes::Grid< 1, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real h; if( thri == 1 ) { h = mesh.getSpaceSteps().x(); } __shared__ volatile Real sArray[ 18 ]; sArray[thri] = TypeInfo< Real >::getMaxValue(); //filling sArray edges int dimX = mesh.getDimensions().x(); __shared__ volatile int numOfBlockx; __shared__ int xkolik; if( thri == 0 ) { xkolik = blockDim.x + 1; numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*blockDim.x+1; } __syncthreads(); if( thri == 0 ) { if( dimX > (blIdx+1) * blockDim.x ) sArray[xkolik] = aux[ blIdx*blockDim.x - 1 + xkolik ]; else sArray[xkolik] = TypeInfo< Real >::getMaxValue(); } if( thri == 1 ) { if( blIdx != 0 ) sArray[0] = aux[ blIdx*blockDim.x - 1 ]; else sArray[0] = TypeInfo< Real >::getMaxValue(); } if( i < mesh.getDimensions().x() ) { sArray[thri+1] = aux[ i ]; } __syncthreads(); while( changed[ 0 ] ) { __syncthreads(); changed[ thri ] = false; //calculation of update cell if( i < mesh.getDimensions().x() ) { if( ! interfaceMap[ i ] ) { changed[ thri ] = ptr.updateCell( sArray, thri+1, h ); } } __syncthreads(); //pyramid reduction if( thri < 8 ) changed[ thri ] = changed[ thri ] || changed[ thri + 8 ]; if( thri < 4 ) changed[ thri ] = changed[ thri ] || changed[ thri + 4 ]; if( thri < 2 ) changed[ thri ] = changed[ thri ] || changed[ thri + 2 ]; if( thri < 1 ) changed[ thri ] = changed[ thri ] || changed[ thri + 1 ]; __syncthreads(); if( changed[ 0 ] && thri == 0 ) BlockIterDevice[ blIdx ] = true; __syncthreads(); } if( i < mesh.getDimensions().x() && (!interfaceMap[ i ]) ) aux[ i ] = sArray[ thri + 1 ]; } #endif src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +36 −31 Original line number Diff line number Diff line Loading @@ -18,13 +18,16 @@ #include <TNL/Devices/Cuda.h> #include <iostream> #include <fstream> template< typename Real, typename Device, typename Index, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 1 ) : maxIterations( 2 ) { } Loading Loading @@ -61,15 +64,31 @@ solve( const MeshPointer& mesh, const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ) { MeshFunctionType v; v.setMesh(mesh); double A[320][320]; for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) A[i][j] = 0; std::ifstream file("/home/maty/Downloads/mapa2.txt"); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) file >> A[i][j]; file.close(); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) v[i*320 + j] = A[i][j]; v.save("mapa.tnl"); MeshFunctionPointer auxPtr; InterfaceMapPointer interfaceMapPtr; auxPtr->setMesh( mesh ); interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); #ifdef HAVE_CUDA cudaDeviceSynchronize(); #endif auxPtr->save( "aux-ini.tnl" ); Loading @@ -92,7 +111,7 @@ solve( const MeshPointer& mesh, { cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading @@ -109,7 +128,7 @@ solve( const MeshPointer& mesh, //std::cerr << "2 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading @@ -126,7 +145,7 @@ solve( const MeshPointer& mesh, //std::cerr << "3 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading @@ -143,7 +162,7 @@ solve( const MeshPointer& mesh, //std::cerr << "4 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading Loading @@ -222,7 +241,6 @@ solve( const MeshPointer& mesh, int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; Loading @@ -243,20 +261,8 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost); /*for( int j = numBlocksY-1; j > -1; j-- ) { for( int i = 0; i < numBlocksX; i++ ) { std::cout << BlockIter[ j * numBlocksY + i ] << "\t"; } std::cout << std::endl; } std::cout << std::endl;*/ for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; Loading Loading @@ -294,12 +300,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thrj == 0 && thri == 0 ) changed[ 0 ] = true; __syncthreads(); const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real hx; __shared__ Real hy; if( thrj == 0 && thri == 0 ) if( thrj == 1 && thri == 1 ) { hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); Loading Loading @@ -345,20 +350,20 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue(); } if( thrj == 0 ) if( thri == 2 ) { if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) sArray[ykolik][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thri+1 ]; sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; else sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue(); sArray[ykolik][thrj+1] = TypeInfo< Real >::getMaxValue(); } if( thrj == 1 ) if( thri == 3 ) { if( blIdy != 0 && thri+1 < xkolik ) sArray[0][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thri+1 ]; sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; else sArray[0][thri+1] = TypeInfo< Real >::getMaxValue(); sArray[0][thrj+1] = TypeInfo< Real >::getMaxValue(); } Loading Loading @@ -422,7 +427,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } if( changed[ 0 ] && thri == 0 && thrj == 0 ) BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ]; BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = true; __syncthreads(); } Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h +1 −1 File changed.Contains only whitespace changes. Show changes Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +16 −7 Original line number Diff line number Diff line Loading @@ -40,8 +40,12 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > template< typename MeshEntity > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell ); const MeshEntity& cell, const RealType velocity = 1.0 ); __cuda_callable__ bool updateCell( volatile Real sArray[18], int thri, const Real h, const Real velocity = 1.0 ); }; Loading Loading @@ -72,12 +76,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ bool updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j ); /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j );*/ }; template< typename Real, Loading Loading @@ -113,6 +111,17 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +183 −95 Original line number Diff line number Diff line Loading @@ -20,6 +20,25 @@ tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, MeshFunctionPointer& _output, InterfaceMapPointer& _interfaceMap ) { if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA const MeshType& mesh = _input->getMesh(); const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); dim3 blockSize( cudaBlockSize ); dim3 gridSize( numBlocksX ); Devices::Cuda::synchronizeDevice(); CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), _output.template modifyData< Device >(), _interfaceMap.template modifyData< Device >() ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { const MeshType& mesh = _input->getMesh(); typedef typename MeshType::Cell Cell; Loading @@ -27,7 +46,20 @@ initInterface( const MeshFunctionPointer& _input, MeshFunctionType& output = _output.modifyData(); InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); Cell cell( mesh ); for( cell.getCoordinates().x() = 1; for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh.getDimensions().x(); cell.getCoordinates().x() ++ ) { cell.refresh(); output[ cell.getIndex() ] = input( cell ) >= 0 ? TypeInfo< RealType >::getMaxValue() : - TypeInfo< RealType >::getMaxValue(); interfaceMap[ cell.getIndex() ] = false; } const RealType& h = mesh.getSpaceSteps().x(); for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh.getDimensions().x() - 1; cell.getCoordinates().x() ++ ) { Loading @@ -36,29 +68,24 @@ initInterface( const MeshFunctionPointer& _input, if( ! cell.isBoundaryEntity() ) { const auto& neighbors = cell.getNeighborEntities(); const RealType& h = mesh.getSpaceSteps().x(); Real pom = 0; //const IndexType& c = cell.getIndex(); const IndexType e = neighbors.template getEntityIndex< 1 >(); const IndexType w = neighbors.template getEntityIndex< -1 >(); if( c * input[ e ] <= 0 ) { output[ cell.getIndex() ] = c >= 0 ? ( h * c )/( c - input[ e ] ) : - ( h * c )/( c - input[ e ] ); pom = TNL::sign( c )*( h * c )/( c - input[ e ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) output[ e ] = pom; interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; } if( c * input[ w ] <=0 ) { output[ cell.getIndex() ] = c >= 0 ? ( h * c )/( c - input[ w ] ) : - ( h * c )/( c - input[ w ] ); interfaceMap[ cell.getIndex() ] = true; } } output[ cell.getIndex() ] = c > 0 ? TNL::TypeInfo< RealType >::getMaxValue() : -TypeInfo< RealType >::getMaxValue(); interfaceMap[ cell.getIndex() ] = false; } } Loading @@ -69,8 +96,31 @@ template< typename Real, void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( MeshFunctionType& u, const MeshEntity& cell ) const MeshEntity& cell, const RealType v ) { const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); const MeshType& mesh = cell.getMesh(); const RealType& h = mesh.getSpaceSteps().x(); const RealType value = u( cell ); RealType a, tmp = TypeInfo< RealType >::getMaxValue(); if( cell.getCoordinates().x() == 0 ) a = u[ neighborEntities.template getEntityIndex< 1 >() ]; else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) a = u[ neighborEntities.template getEntityIndex< -1 >() ]; else { a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], u[ neighborEntities.template getEntityIndex< 1 >() ] ); } if( fabs( a ) == TypeInfo< RealType >::getMaxValue() ) return; tmp = a + TNL::sign( value ) * h/v; u[ cell.getIndex() ] = argAbsMin( value, tmp ); } template< typename Real, Loading Loading @@ -103,7 +153,20 @@ initInterface( const MeshFunctionPointer& _input, } if( std::is_same< Device, Devices::Host >::value ) { const MeshFunctionType& input = _input.getData(); MeshFunctionType input = _input.getData(); double A[320][320]; std::ifstream fileInit("/home/maty/Downloads/initData.txt"); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) fileInit >> A[i][j]; fileInit.close(); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) input[i*320 + j] = A[i][j]; MeshFunctionType& output = _output.modifyData(); InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); const MeshType& mesh = input.getMesh(); Loading Loading @@ -632,64 +695,60 @@ __cuda_callable__ void sortMinims( T1 pom[] ) } } template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) { int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; int xOd = 0; int yOd = 0; int i = threadIdx.x + blockDim.x*blockIdx.x; const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); if( blockIdx == 0 ) xOd = 1; if( blockIdy == 0 ) yOd = 1; if( i < mesh.getDimensions().x() ) { typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.refresh(); const Index cind = cell.getIndex(); if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+1; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+1; output[ cind ] = input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - TypeInfo< Real >::getMaxValue(); interfaceMap[ cind ] = false; if( dimX > (blockIdx+1) * 16 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ]; } } if( blockIdx != 0 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0]; } } if( dimY > (blockIdy+1) * 16 ) const Real& h = mesh.getSpaceSteps().x(); cell.refresh(); const Real& c = input( cell ); if( ! cell.isBoundaryEntity() ) { for( int i = xOd; i < xkolik; i++ ) auto neighbors = cell.getNeighborEntities(); Real pom = 0; const Index e = neighbors.template getEntityIndex< 1 >(); const Index w = neighbors.template getEntityIndex< -1 >(); if( c * input[ e ] <= 0 ) { sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ]; } pom = TNL::sign( c )*( h * c )/( c - input[ e ]); if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) output[ cind ] = pom; interfaceMap[ cind ] = true; } if( blockIdy != 0 ) { for( int i = xOd; i < xkolik; i++ ) if( c * input[ w ] <= 0 ) { sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ]; pom = TNL::sign( c )*( h * c )/( c - input[ w ]); if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) output[ cind ] = pom; interfaceMap[ cind ] = true; } } } #ifdef HAVE_CUDA } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Loading Loading @@ -886,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.001 ) if ( fabs( tmp ) > 0.01*hx ) return true; else return false; Loading @@ -898,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.01 ) if ( fabs( tmp ) > 0.01*hx ) return true; else return false; Loading @@ -906,4 +965,33 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con return false; } template< typename Real, typename Device, typename Index > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) { const RealType value = sArray[ thri ]; RealType a, tmp = TypeInfo< RealType >::getMaxValue(); a = TNL::argAbsMin( sArray[ thri+1 ], sArray[ thri-1 ] ); if( fabs( a ) == TypeInfo< RealType >::getMaxValue() ) return false; tmp = a + TNL::sign( value ) * h/v; sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; if ( fabs( tmp ) > 0.01*h ) return true; else return false; } #endif
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h +184 −7 Original line number Diff line number Diff line Loading @@ -58,13 +58,190 @@ solve( const MeshPointer& mesh, const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ) { MeshFunctionPointer aux; InterfaceMapPointer interfaceMap; aux->setMesh( mesh ); interfaceMap->setMesh( mesh ); MeshFunctionPointer auxPtr; InterfaceMapPointer interfaceMapPtr; auxPtr->setMesh( mesh ); interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); aux->save( "aux-ini.tnl" ); BaseType::initInterface( u, auxPtr, interfaceMapPtr ); auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); IndexType iteration( 0 ); InterfaceMapType interfaceMap = *interfaceMapPtr; MeshFunctionType aux = *auxPtr; while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh->getDimensions().x(); cell.getCoordinates().x()++ ) { cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); } for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; cell.getCoordinates().x() >= 0 ; cell.getCoordinates().x()-- ) { cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); } } if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code #ifdef HAVE_CUDA const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); dim3 blockSize( cudaBlockSize ); dim3 gridSize( numBlocksX ); tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr; bool* BlockIter = (bool*)malloc( ( numBlocksX ) * sizeof( bool ) ); bool *BlockIterDevice; cudaMalloc(&BlockIterDevice, ( numBlocksX ) * sizeof( bool ) ); while( BlockIter[ 0 ] ) { for( int i = 0; i < numBlocksX; i++ ) BlockIter[ i ] = false; cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX ) * sizeof( bool ), cudaMemcpyHostToDevice); CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX ) * sizeof( bool ), cudaMemcpyDeviceToHost); for( int i = 1; i < numBlocksX; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; } delete[] BlockIter; cudaFree( BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; interfaceMap = *interfaceMapPtr; #endif } iteration++; } aux.save("aux-final.tnl"); } #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ) { int thri = threadIdx.x; int blIdx = blockIdx.x; int i = thri + blockDim.x*blIdx; __shared__ volatile bool changed[16]; changed[ thri ] = false; if( thri == 0 ) changed[ 0 ] = true; const Meshes::Grid< 1, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real h; if( thri == 1 ) { h = mesh.getSpaceSteps().x(); } __shared__ volatile Real sArray[ 18 ]; sArray[thri] = TypeInfo< Real >::getMaxValue(); //filling sArray edges int dimX = mesh.getDimensions().x(); __shared__ volatile int numOfBlockx; __shared__ int xkolik; if( thri == 0 ) { xkolik = blockDim.x + 1; numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*blockDim.x+1; } __syncthreads(); if( thri == 0 ) { if( dimX > (blIdx+1) * blockDim.x ) sArray[xkolik] = aux[ blIdx*blockDim.x - 1 + xkolik ]; else sArray[xkolik] = TypeInfo< Real >::getMaxValue(); } if( thri == 1 ) { if( blIdx != 0 ) sArray[0] = aux[ blIdx*blockDim.x - 1 ]; else sArray[0] = TypeInfo< Real >::getMaxValue(); } if( i < mesh.getDimensions().x() ) { sArray[thri+1] = aux[ i ]; } __syncthreads(); while( changed[ 0 ] ) { __syncthreads(); changed[ thri ] = false; //calculation of update cell if( i < mesh.getDimensions().x() ) { if( ! interfaceMap[ i ] ) { changed[ thri ] = ptr.updateCell( sArray, thri+1, h ); } } __syncthreads(); //pyramid reduction if( thri < 8 ) changed[ thri ] = changed[ thri ] || changed[ thri + 8 ]; if( thri < 4 ) changed[ thri ] = changed[ thri ] || changed[ thri + 4 ]; if( thri < 2 ) changed[ thri ] = changed[ thri ] || changed[ thri + 2 ]; if( thri < 1 ) changed[ thri ] = changed[ thri ] || changed[ thri + 1 ]; __syncthreads(); if( changed[ 0 ] && thri == 0 ) BlockIterDevice[ blIdx ] = true; __syncthreads(); } if( i < mesh.getDimensions().x() && (!interfaceMap[ i ]) ) aux[ i ] = sArray[ thri + 1 ]; } #endif
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +36 −31 Original line number Diff line number Diff line Loading @@ -18,13 +18,16 @@ #include <TNL/Devices/Cuda.h> #include <iostream> #include <fstream> template< typename Real, typename Device, typename Index, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 1 ) : maxIterations( 2 ) { } Loading Loading @@ -61,15 +64,31 @@ solve( const MeshPointer& mesh, const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ) { MeshFunctionType v; v.setMesh(mesh); double A[320][320]; for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) A[i][j] = 0; std::ifstream file("/home/maty/Downloads/mapa2.txt"); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) file >> A[i][j]; file.close(); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) v[i*320 + j] = A[i][j]; v.save("mapa.tnl"); MeshFunctionPointer auxPtr; InterfaceMapPointer interfaceMapPtr; auxPtr->setMesh( mesh ); interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); #ifdef HAVE_CUDA cudaDeviceSynchronize(); #endif auxPtr->save( "aux-ini.tnl" ); Loading @@ -92,7 +111,7 @@ solve( const MeshPointer& mesh, { cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading @@ -109,7 +128,7 @@ solve( const MeshPointer& mesh, //std::cerr << "2 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading @@ -126,7 +145,7 @@ solve( const MeshPointer& mesh, //std::cerr << "3 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading @@ -143,7 +162,7 @@ solve( const MeshPointer& mesh, //std::cerr << "4 -> "; cell.refresh(); if( ! interfaceMap( cell ) ) this->updateCell( aux, cell ); this->updateCell( aux, cell, v( cell ) ); } } Loading Loading @@ -222,7 +241,6 @@ solve( const MeshPointer& mesh, int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; Loading @@ -243,20 +261,8 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost); /*for( int j = numBlocksY-1; j > -1; j-- ) { for( int i = 0; i < numBlocksX; i++ ) { std::cout << BlockIter[ j * numBlocksY + i ] << "\t"; } std::cout << std::endl; } std::cout << std::endl;*/ for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; Loading Loading @@ -294,12 +300,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thrj == 0 && thri == 0 ) changed[ 0 ] = true; __syncthreads(); const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real hx; __shared__ Real hy; if( thrj == 0 && thri == 0 ) if( thrj == 1 && thri == 1 ) { hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); Loading Loading @@ -345,20 +350,20 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue(); } if( thrj == 0 ) if( thri == 2 ) { if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) sArray[ykolik][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thri+1 ]; sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; else sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue(); sArray[ykolik][thrj+1] = TypeInfo< Real >::getMaxValue(); } if( thrj == 1 ) if( thri == 3 ) { if( blIdy != 0 && thri+1 < xkolik ) sArray[0][thri+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thri+1 ]; sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; else sArray[0][thri+1] = TypeInfo< Real >::getMaxValue(); sArray[0][thrj+1] = TypeInfo< Real >::getMaxValue(); } Loading Loading @@ -422,7 +427,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } if( changed[ 0 ] && thri == 0 && thrj == 0 ) BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ]; BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = true; __syncthreads(); } Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalProblem_impl.h +1 −1 File changed.Contains only whitespace changes. Show changes