Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +10 −8 Original line number Diff line number Diff line Loading @@ -74,12 +74,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const MeshEntity& cell, const RealType velocity = 1.0 ); __cuda_callable__ bool updateCell( volatile Real sArray[18][18], template< int sizeSArray > __cuda_callable__ bool updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); template< int sizeSArray > void updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, ArrayContainer BlockIterHost, int numThreadsPerBlock ); MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; Loading Loading @@ -119,9 +123,6 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); template < typename Index > void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, Loading @@ -134,11 +135,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ); template < typename Real, typename Device, typename Index > template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 ); const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +1036 −1009 Original line number Diff line number Diff line Loading @@ -92,12 +92,15 @@ initInterface( const MeshFunctionPointer& _input, template< typename Real, typename Device, typename Index > template< int sizeSArray > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, ArrayContainer BlockIterHost, int numThreadsPerBlock ) MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { #pragma omp parallel for schedule( dynamic ) for( int i = 0; i < BlockIterHost.getSize(); i++ ) { if( BlockIterHost[ i ] ) Loading @@ -105,19 +108,23 @@ updateBlocks( InterfaceMapType interfaceMap, MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; int xkolik = numThreadsPerBlock + 1; int ykolik = numThreadsPerBlock + 1; int blIdx = i%numOfBlockx; int blIdy = i/numOfBlocky; int blIdy = i/numOfBlockx; //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*numThreadsPerBlock+1; if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*numThreadsPerBlock+1; //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; Loading @@ -125,114 +132,133 @@ updateBlocks( InterfaceMapType interfaceMap, Real hx = mesh.getSpaceSteps().x(); Real hy = mesh.getSpaceSteps().y(); Real changed1[ 16*16 ]; /*Real changed2[ 16*16 ]; Real changed3[ 16*16 ]; Real changed4[ 16*16 ];*/ Real sArray[18][18]; bool changed = false; for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ ) for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ ) sArray[thrj][thri] = std::numeric_limits< Real >::max(); RealType *sArray; sArray = new Real[ sizeSArray * sizeSArray ]; if( sArray == nullptr ) std::cout << "Error while allocating memory for sArray." << std::endl; for( int thri = 0; thri < sizeSArray; thri++ ){ for( int thrj = 0; thrj < sizeSArray; thrj++ ) sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); } BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; else sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; if( blIdx != 0 && thrj+1 < ykolik ) sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; else sArray[thrj+1][0] = std::numeric_limits< Real >::max(); sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; else sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; if( blIdy != 0 && thrj+1 < xkolik ) sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; else sArray[0][thrj+1] = std::numeric_limits< Real >::max(); sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; } for( int k = 0; k < numThreadsPerBlock; k++ ) for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; for( int k = 0; k < numThreadsPerBlock; k++ ) if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; } bool pom = false; for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ){ changed1[ k*numThreadsPerBlock + l ] = 0; /*changed2[ k*numThreadsPerBlock + l ] = 0; changed3[ k*numThreadsPerBlock + l ] = 0; changed4[ k*numThreadsPerBlock + l ] = 0;*/ if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){ //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy); pom = this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); changed = changed || pom; } } } } /*aux.save( "aux-1pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) for( int k = 0; k < numThreadsPerBlock; k++ ) for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); } } } /*aux.save( "aux-2pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ for( int k = 0; k < numThreadsPerBlock; k++ ) for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); } } } /*aux.save( "aux-3pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int k = numThreadsPerBlock-1; k > -1; k-- ){ for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); } } } for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = numThreadsPerBlock-1; l >-1; l-- ){ changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ]; /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ]; changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ]; changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/ } /*aux.save( "aux-4pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ ) if( changed ){ BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1; } for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY && (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) ) aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ]; if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ]; //std::cout<< sArray[k+1][l+1]; } //std::cout<<std::endl; } //delete []sArray; } } } Loading @@ -249,27 +275,27 @@ getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) for(int i = 0; i < numBlockX * numBlockY; i++) { BlockIterPom[ i ] = 0; if( BlockIterHost[ i ] ) { // i = k*numBlockY + m; BlockIterPom[ i ] = 0;//BlockIterPom[ i ] = 0; int m=0, k=0; m = i%numBlockX; k = i/numBlockX; if( k > 0 ) BlockIterPom[i - numBlockX] = 1; if( k < numBlockY - 1 ) BlockIterPom[i + numBlockX] = 1; if( m < numBlockX - 1 ) BlockIterPom[ i+1 ] = 1; if( m > 0 ) BlockIterPom[ i-1 ] = 1; if( m > 0 && BlockIterHost[ i - 1 ] ){ BlockIterPom[ i ] = 1; }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ BlockIterPom[ i ] = 1; }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ BlockIterPom[ i ] = 1; }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ BlockIterPom[ i ] = 1; } //BlockIterPom[ i ]; } for(int i = 0; i < numBlockX * numBlockY; i++) //if( !BlockIter[ i ] ) { if( !BlockIterHost[ i ] ) BlockIterHost[ i ] = BlockIterPom[ i ]; } /*else BlockIter[ i ] = 0;*/ /*for( int i = numBlockX-1; i > -1; i-- ) Loading Loading @@ -353,11 +379,11 @@ initInterface( const MeshFunctionPointer& _input, for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) fileInit >> A[i][j]; fileInit >> A[j]; fileInit.close(); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) input[i*320 + j] = A[i][j];*/ input[i*320 + j] = A[j];*/ MeshFunctionType& output = _output.modifyData(); Loading Loading @@ -1110,20 +1136,21 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 template< typename Real, typename Device, typename Index > template< int sizeSArray > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, const Real v ) { const RealType value = sArray[ thrj ][ thri ]; const RealType value = sArray[ thrj * sizeSArray + thri ]; RealType a, b, tmp = std::numeric_limits< RealType >::max(); b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], sArray[ thrj-1 ][ thri ] ); b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], sArray[ (thrj-1) * sizeSArray + thri ] ); a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], sArray[ thrj ][ thri-1 ] ); a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], sArray[ thrj * sizeSArray + thri-1 ] ); if( fabs( a ) == std::numeric_limits< RealType >::max() && fabs( b ) == std::numeric_limits< RealType >::max() ) Loading @@ -1136,8 +1163,8 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con if( fabs( tmp ) < fabs( pom[ 1 ] ) ) { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj * sizeSArray + thri ]; if ( fabs( tmp ) > 0.001*hx ) return true; else Loading @@ -1148,8 +1175,8 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj * sizeSArray + thri ]; if ( fabs( tmp ) > 0.001*hx ) return true; else Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +394 −226 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +10 −8 Original line number Diff line number Diff line Loading @@ -74,12 +74,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const MeshEntity& cell, const RealType velocity = 1.0 ); __cuda_callable__ bool updateCell( volatile Real sArray[18][18], template< int sizeSArray > __cuda_callable__ bool updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); template< int sizeSArray > void updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, ArrayContainer BlockIterHost, int numThreadsPerBlock ); MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; Loading Loading @@ -119,9 +123,6 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); template < typename Index > void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, Loading @@ -134,11 +135,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ); template < typename Real, typename Device, typename Index > template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 ); const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +1036 −1009 Original line number Diff line number Diff line Loading @@ -92,12 +92,15 @@ initInterface( const MeshFunctionPointer& _input, template< typename Real, typename Device, typename Index > template< int sizeSArray > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, ArrayContainer BlockIterHost, int numThreadsPerBlock ) MeshFunctionType helpFunc, ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { #pragma omp parallel for schedule( dynamic ) for( int i = 0; i < BlockIterHost.getSize(); i++ ) { if( BlockIterHost[ i ] ) Loading @@ -105,19 +108,23 @@ updateBlocks( InterfaceMapType interfaceMap, MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; int xkolik = numThreadsPerBlock + 1; int ykolik = numThreadsPerBlock + 1; int blIdx = i%numOfBlockx; int blIdy = i/numOfBlocky; int blIdy = i/numOfBlockx; //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*numThreadsPerBlock+1; if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*numThreadsPerBlock+1; //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; Loading @@ -125,114 +132,133 @@ updateBlocks( InterfaceMapType interfaceMap, Real hx = mesh.getSpaceSteps().x(); Real hy = mesh.getSpaceSteps().y(); Real changed1[ 16*16 ]; /*Real changed2[ 16*16 ]; Real changed3[ 16*16 ]; Real changed4[ 16*16 ];*/ Real sArray[18][18]; bool changed = false; for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ ) for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ ) sArray[thrj][thri] = std::numeric_limits< Real >::max(); RealType *sArray; sArray = new Real[ sizeSArray * sizeSArray ]; if( sArray == nullptr ) std::cout << "Error while allocating memory for sArray." << std::endl; for( int thri = 0; thri < sizeSArray; thri++ ){ for( int thrj = 0; thrj < sizeSArray; thrj++ ) sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); } BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; else sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; if( blIdx != 0 && thrj+1 < ykolik ) sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; else sArray[thrj+1][0] = std::numeric_limits< Real >::max(); sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; else sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; if( blIdy != 0 && thrj+1 < xkolik ) sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; else sArray[0][thrj+1] = std::numeric_limits< Real >::max(); sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; } for( int k = 0; k < numThreadsPerBlock; k++ ) for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; for( int k = 0; k < numThreadsPerBlock; k++ ) if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; } bool pom = false; for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ){ changed1[ k*numThreadsPerBlock + l ] = 0; /*changed2[ k*numThreadsPerBlock + l ] = 0; changed3[ k*numThreadsPerBlock + l ] = 0; changed4[ k*numThreadsPerBlock + l ] = 0;*/ if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){ //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy); pom = this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); changed = changed || pom; } } } } /*aux.save( "aux-1pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) for( int k = 0; k < numThreadsPerBlock; k++ ) for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); } } } /*aux.save( "aux-2pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ for( int k = 0; k < numThreadsPerBlock; k++ ) for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); } } } /*aux.save( "aux-3pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int k = numThreadsPerBlock-1; k > -1; k-- ){ for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); this->updateCell<sizeSArray>( sArray/*[i]*/, l+1, k+1, hx,hy); } } } for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = numThreadsPerBlock-1; l >-1; l-- ){ changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ]; /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ]; changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ]; changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/ } /*aux.save( "aux-4pruch.tnl" ); for( int k = 0; k < sizeSArray; k++ ){ for( int l = 0; l < sizeSArray; l++ ) { std::cout << sArray[ k * sizeSArray + l] << " "; } std::cout << std::endl; }*/ if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ ) if( changed ){ BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1; } for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY && (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) ) aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ]; if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ]; //std::cout<< sArray[k+1][l+1]; } //std::cout<<std::endl; } //delete []sArray; } } } Loading @@ -249,27 +275,27 @@ getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) for(int i = 0; i < numBlockX * numBlockY; i++) { BlockIterPom[ i ] = 0; if( BlockIterHost[ i ] ) { // i = k*numBlockY + m; BlockIterPom[ i ] = 0;//BlockIterPom[ i ] = 0; int m=0, k=0; m = i%numBlockX; k = i/numBlockX; if( k > 0 ) BlockIterPom[i - numBlockX] = 1; if( k < numBlockY - 1 ) BlockIterPom[i + numBlockX] = 1; if( m < numBlockX - 1 ) BlockIterPom[ i+1 ] = 1; if( m > 0 ) BlockIterPom[ i-1 ] = 1; if( m > 0 && BlockIterHost[ i - 1 ] ){ BlockIterPom[ i ] = 1; }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ BlockIterPom[ i ] = 1; }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ BlockIterPom[ i ] = 1; }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ BlockIterPom[ i ] = 1; } //BlockIterPom[ i ]; } for(int i = 0; i < numBlockX * numBlockY; i++) //if( !BlockIter[ i ] ) { if( !BlockIterHost[ i ] ) BlockIterHost[ i ] = BlockIterPom[ i ]; } /*else BlockIter[ i ] = 0;*/ /*for( int i = numBlockX-1; i > -1; i-- ) Loading Loading @@ -353,11 +379,11 @@ initInterface( const MeshFunctionPointer& _input, for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) fileInit >> A[i][j]; fileInit >> A[j]; fileInit.close(); for (int i = 0; i < 320; i++) for (int j = 0; j < 320; j++) input[i*320 + j] = A[i][j];*/ input[i*320 + j] = A[j];*/ MeshFunctionType& output = _output.modifyData(); Loading Loading @@ -1110,20 +1136,21 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 template< typename Real, typename Device, typename Index > template< int sizeSArray > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, const Real v ) { const RealType value = sArray[ thrj ][ thri ]; const RealType value = sArray[ thrj * sizeSArray + thri ]; RealType a, b, tmp = std::numeric_limits< RealType >::max(); b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], sArray[ thrj-1 ][ thri ] ); b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], sArray[ (thrj-1) * sizeSArray + thri ] ); a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], sArray[ thrj ][ thri-1 ] ); a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], sArray[ thrj * sizeSArray + thri-1 ] ); if( fabs( a ) == std::numeric_limits< RealType >::max() && fabs( b ) == std::numeric_limits< RealType >::max() ) Loading @@ -1136,8 +1163,8 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con if( fabs( tmp ) < fabs( pom[ 1 ] ) ) { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj * sizeSArray + thri ]; if ( fabs( tmp ) > 0.001*hx ) return true; else Loading @@ -1148,8 +1175,8 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj * sizeSArray + thri ]; if ( fabs( tmp ) > 0.001*hx ) return true; else Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +394 −226 File changed.Preview size limit exceeded, changes collapsed. Show changes