From f5c32276c88cf7bd2ca5bc45b3bfc06768486275 Mon Sep 17 00:00:00 2001 From: Fencl <fenclmat@fjfi.cvut.cz> Date: Thu, 4 Oct 2018 19:30:14 +0200 Subject: [PATCH] Chess model implemented in 2D. --- .../tnlDirectEikonalMethodsBase.h | 8 +- .../tnlDirectEikonalMethodsBase_impl.h | 12 +- .../tnlFastSweepingMethod2D_impl.h | 212 ++++++++++-------- 3 files changed, 124 insertions(+), 108 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index b981a92a8c..eb7cbd2a52 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Real *aux, - int *BlockIterDevice); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + int *BlockIterDevice, int oddEvenBlock); __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); -template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ); +/*template < typename Real, typename Device, typename Index > +__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 649a5ad43a..cfea6aca0c 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; - if ( fabs( tmp ) > 0.01*h ) + if ( fabs( tmp ) > 0.001*h ) return true; else return false; @@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 6703843c17..7e4028fbe9 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -26,7 +26,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() -: maxIterations( 100 ) +: maxIterations( 1 ) { } @@ -250,7 +250,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); + //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); @@ -261,7 +261,7 @@ solve( const MeshPointer& mesh, int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); int *dBlock; cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); - + int oddEvenBlock = 0; while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) @@ -269,19 +269,30 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), - dAux, - BlockIterDevice ); + auxPtr.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - + TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 ); + //aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 ); cudaFree( dAux ); cudaFree( BlockIterDevice ); cudaFree( dBlock ); @@ -299,7 +310,7 @@ solve( const MeshPointer& mesh, } #ifdef HAVE_CUDA -template < typename Real, typename Device, typename Index > +/*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) { int i = threadIdx.x + blockDim.x*blockIdx.x; @@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; } -} +}*/ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) { @@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Real *aux, - int *BlockIterDevice ) + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + int *BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; @@ -417,109 +428,114 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } __syncthreads(); - if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - } - - if( thri == 1 ) - { - if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); - } - - if( thri == 2 ) - { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); - } - - if( thri == 3 ) + if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) { - if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); - } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; - } - __syncthreads(); + if( thri == 0 ) + { + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) + sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + } + + if( thri == 1 ) + { + if( blIdx != 0 && thrj+1 < ykolik ) + sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + } + + if( thri == 2 ) + { + if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) + sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + } + + if( thri == 3 ) + { + if( blIdy != 0 && thrj+1 < xkolik ) + sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } + - while( changed[ 0 ] ) - { - __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + } + __syncthreads(); + + while( changed[ 0 ] ) { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + __syncthreads(); + + changed[ currentIndex] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + { + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + } } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y == 1024 ) - { - if( currentIndex < 512 ) + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y == 1024 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; + if( currentIndex < 512 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 512 ) - { - if( currentIndex < 256 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 512 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; + if( currentIndex < 256 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 256 ) - { - if( currentIndex < 128 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 256 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; + if( currentIndex < 128 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 128 ) - { - if( currentIndex < 64 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 128 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + if( currentIndex < 64 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + } } + __syncthreads(); + if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + { + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + if( changed[ 0 ] && thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; + __syncthreads(); } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU - { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; - } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; - __syncthreads(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) + aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; + } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; } #endif -- GitLab