From b387101c185fb66d078734baa16e46afb5c713f1 Mon Sep 17 00:00:00 2001 From: fencl <fenclmat@fjfi.cvut.cz> Date: Tue, 19 Jun 2018 08:49:04 +0200 Subject: [PATCH] Mistake found and repaied from 2D. Problem with EOC appeared. --- .../tnlDirectEikonalMethodsBase.h | 4 +- .../tnlDirectEikonalMethodsBase_impl.h | 30 ++-- .../tnlFastSweepingMethod2D_impl.h | 139 ++++++------------ 3 files changed, 59 insertions(+), 114 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index fe83b61c07..979c70e7b5 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -69,7 +69,7 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const MeshEntity& cell, const RealType velocity = 1.0 ); - __cuda_callable__ double updateCell( volatile Real sArray[18][18], + __cuda_callable__ bool updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); @@ -117,7 +117,7 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - Real *BlockIterDevice ); + bool *BlockIterDevice ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 098dace4b2..9ec95c25f6 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -859,22 +859,12 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -double +bool tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real v ) { - - //const Meshes::Grid< 2, Real, Device, Index >& mesh = u.template getMesh< Devices::Cuda >(); - /*typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; - cell.refresh(); - - const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y();*/ - const RealType value = sArray[ thrj ][ thri ];//u( cell ); + const RealType value = sArray[ thrj ][ thri ]; RealType a, b, tmp = TypeInfo< RealType >::getMaxValue(); b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], @@ -885,7 +875,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con if( fabs( a ) == TypeInfo< RealType >::getMaxValue() && fabs( b ) == TypeInfo< RealType >::getMaxValue() ) - return 0; + return false; RealType pom[6] = { a, b, TypeInfo< Real >::getMaxValue(), (RealType)hx, (RealType)hy, 0.0 }; sortMinims( pom ); @@ -896,10 +886,10 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.1 ) - return 10; + if ( fabs( tmp ) > 0.001 ) + return true; else - return 0; + return false; } else { @@ -908,12 +898,12 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.1 ) - return 10; + if ( fabs( tmp ) > 0.01 ) + return true; else - return 0; + return false; } - return 0; + return false; } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index e24201936c..e727104086 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -227,46 +227,38 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - /*int nBlockIter = numBlocksX > numBlocksY ? numBlocksX : numBlocksY; - nBlockIter = numBlocksX == numBlocksY ? nBlockIter + 1 : nBlockIter;*/ - bool isNotDone = true; numBlocksX = 16; numBlocksY = 16; - double* BlockIter = (double*)malloc( ( numBlocksX * numBlocksY ) * sizeof( double ) ); + bool* BlockIter = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); - double *BlockIterDevice; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( double ) ); + bool *BlockIterDevice; + cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ) ); - //while( isNotDone ) + while( BlockIter[ 0 ] ) { for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ i ] = 0.0; - cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( double ), cudaMemcpyHostToDevice); - - isNotDone = false; - + BlockIter[ i ] = false; + cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyHostToDevice); + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( double ), cudaMemcpyDeviceToHost); + cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( bool ), cudaMemcpyDeviceToHost); - for( int j = numBlocksY-1; j > -1; j-- ) + /*for( int j = numBlocksY-1; j > -1; j-- ) { for( int i = 0; i < numBlocksX; i++ ) { - //if( BlockIter[ j * numBlocksX + i ] ) - std::cout << BlockIter[ j * numBlocksY + i ] << "\t"; - //else - // std::cout << "0" << " "; + std::cout << BlockIter[ j * numBlocksY + i ] << "\t"; } std::cout << std::endl; } - std::cout << std::endl; + std::cout << std::endl;*/ - for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - isNotDone = isNotDone || BlockIter[ i ]; + for( int i = 1; i < numBlocksX * numBlocksY; i++ ) + BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ]; } delete[] BlockIter; @@ -289,7 +281,7 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - Real *BlockIterDevice ) + bool *BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; @@ -297,15 +289,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< int j = blockDim.y*blIdy + thrj; int currentIndex = thrj * 16 + thri; - __shared__ volatile Real changed[256]; - changed[ currentIndex ] = 0.0; - __syncthreads(); + __shared__ volatile bool changed[256]; + changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 ) - changed[ 0 ] = 10.0; + changed[ 0 ] = true; __syncthreads(); - //SmallCycleBoolin = true; - const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real hx; @@ -315,11 +304,9 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); } - __syncthreads(); __shared__ volatile Real sArray[ 18 ][ 18 ]; sArray[thrj][thri] = TypeInfo< Real >::getMaxValue(); - __syncthreads(); //filling sArray edges int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); @@ -339,12 +326,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*blockDim.y+1; - - //filling BlockIterDevice - //BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0.0; - } - /*if( blIdx == 2 && blIdy == 3 ) - BlockIterDevice[ currentIndex ] = 0.0;*/ + } __syncthreads(); if( thri == 0 ) @@ -354,7 +336,6 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< else sArray[thrj+1][xkolik] = TypeInfo< Real >::getMaxValue(); } - __syncthreads(); if( thri == 1 ) { @@ -363,7 +344,6 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< else sArray[thrj+1][0] = TypeInfo< Real >::getMaxValue(); } - __syncthreads(); if( thrj == 0 ) { @@ -372,7 +352,6 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< else sArray[ykolik][thri+1] = TypeInfo< Real >::getMaxValue(); } - __syncthreads(); if( thrj == 1 ) { @@ -381,44 +360,47 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< else sArray[0][thri+1] = TypeInfo< Real >::getMaxValue(); } - __syncthreads(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; - __syncthreads(); - } - - int loopcounter; - loopcounter = 0; - __syncthreads(); - - /*if( ( blIdx == 4 && blIdy == 5 ) || ( blIdx == 2 && blIdy == 2 ) || ( blIdx == 2 && blIdy == 3 ) || - ( blIdx == 5 && blIdy == 5 ) || ( blIdx == 6 && blIdy == 5 ) || ( blIdx == 5 && blIdy == 4 ) || - ( blIdx == 5 && blIdy == 2 ) || ( blIdx == 3 && blIdy == 2 ) || ( blIdx == 4 && blIdy == 2 ) )*/ - while( changed[ 0 ] > 0.1 ) + } + __syncthreads(); + + while( changed[ 0 ] ) { - changed[ currentIndex] = 0.0; __syncthreads(); + changed[ currentIndex] = false; + //calculation of update cell if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx, hy ); - __syncthreads(); + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx, hy ); } } __syncthreads(); + /*if( thri == 0 && thrj == 0 && blIdx == 1 && blIdy == 3 ){ + for( int h = 15; h > -1; h-- ){ + for( int g = 0; g < 16; g++ ){ + printf( "%d\t", changed[h*16+g] ); + } + printf("\n"); + } + printf("\n"); + } + __syncthreads();*/ + //pyramid reduction if( blockDim.x*blockDim.y >= 256 ) { if( currentIndex < 128 ) { - changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 128 ]; + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; } } __syncthreads(); @@ -426,52 +408,25 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< { if( currentIndex < 64 ) { - changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 64 ]; + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; } } __syncthreads(); if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] + changed[ currentIndex + 1 ]; + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } + if( changed[ 0 ] && thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = changed[ 0 ]; __syncthreads(); - - - - loopcounter++; - __syncthreads(); - if( currentIndex != 0 ) - changed[ currentIndex ] = 0.0; - __syncthreads(); - - if( loopcounter > 200 ) - changed[ currentIndex ] = 0.0; - __syncthreads(); - } - /*__syncthreads(); - if( blIdy == 3 && blIdx == 1 ) - { - BlockIterDevice[ currentIndex ] = changed[ currentIndex ]; - BlockIterDevice[ 0 ] = loopcounter; - }*/ - __syncthreads(); - if( blIdx == 2 && blIdy == 3 ) - { - //SmallCycleBoolin = changed[ 0 ]; - //if( changed[ 0 ] ) - BlockIterDevice[ currentIndex ] = changed[ currentIndex ];// loopcounter; - BlockIterDevice[ 0 ] = loopcounter; - } - - + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; - __syncthreads(); } #endif -- GitLab