From f5c32276c88cf7bd2ca5bc45b3bfc06768486275 Mon Sep 17 00:00:00 2001 From: Fencl Date: Thu, 4 Oct 2018 19:30:14 +0200 Subject: [PATCH 01/20] Chess model implemented in 2D. --- .../tnlDirectEikonalMethodsBase.h | 8 +- .../tnlDirectEikonalMethodsBase_impl.h | 12 +- .../tnlFastSweepingMethod2D_impl.h | 212 ++++++++++-------- 3 files changed, 124 insertions(+), 108 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index b981a92a8..eb7cbd2a5 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Real *aux, - int *BlockIterDevice); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + int *BlockIterDevice, int oddEvenBlock); __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); -template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ); +/*template < typename Real, typename Device, typename Index > +__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 649a5ad43..cfea6aca0 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; - if ( fabs( tmp ) > 0.01*h ) + if ( fabs( tmp ) > 0.001*h ) return true; else return false; @@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 6703843c1..7e4028fbe 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -26,7 +26,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() -: maxIterations( 100 ) +: maxIterations( 1 ) { } @@ -250,7 +250,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); + //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); @@ -261,7 +261,7 @@ solve( const MeshPointer& mesh, int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); int *dBlock; cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); - + int oddEvenBlock = 0; while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) @@ -269,19 +269,30 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), - dAux, - BlockIterDevice ); + auxPtr.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - + TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); + //aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); cudaFree( dAux ); cudaFree( BlockIterDevice ); cudaFree( dBlock ); @@ -299,7 +310,7 @@ solve( const MeshPointer& mesh, } #ifdef HAVE_CUDA -template < typename Real, typename Device, typename Index > +/*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) { int i = threadIdx.x + blockDim.x*blockIdx.x; @@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; } -} +}*/ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) { @@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Real *aux, - int *BlockIterDevice ) + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + int *BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; @@ -417,109 +428,114 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } __syncthreads(); - if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - } - - if( thri == 1 ) - { - if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); - } - - if( thri == 2 ) - { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); - } - - if( thri == 3 ) + if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) { - if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); - } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; - } - __syncthreads(); + if( thri == 0 ) + { + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) + sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + } + + if( thri == 1 ) + { + if( blIdx != 0 && thrj+1 < ykolik ) + sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + } + + if( thri == 2 ) + { + if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) + sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + } + + if( thri == 3 ) + { + if( blIdy != 0 && thrj+1 < xkolik ) + sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } + - while( changed[ 0 ] ) - { - __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + } + __syncthreads(); + + while( changed[ 0 ] ) { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + __syncthreads(); + + changed[ currentIndex] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + { + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + } } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y == 1024 ) - { - if( currentIndex < 512 ) + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y == 1024 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; + if( currentIndex < 512 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 512 ) - { - if( currentIndex < 256 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 512 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; + if( currentIndex < 256 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 256 ) - { - if( currentIndex < 128 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 256 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; + if( currentIndex < 128 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 128 ) - { - if( currentIndex < 64 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 128 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + if( currentIndex < 64 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + } } + __syncthreads(); + if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + { + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + if( changed[ 0 ] && thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; + __syncthreads(); } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU - { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; - } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; - __syncthreads(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) + aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; + } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; } #endif -- GitLab From da336fb8bd927bc927bde8bde5876b18f07a23cf Mon Sep 17 00:00:00 2001 From: Fencl Date: Sun, 7 Oct 2018 12:55:16 +0200 Subject: [PATCH 02/20] FIM method implemented. Neighbours are being found on CPU. 3D parallel method disabled because of Array changes. --- .../tnlDirectEikonalMethodsBase.h | 9 +- .../tnlFastSweepingMethod2D_impl.h | 199 +++++++++++------- .../tnlFastSweepingMethod3D_impl.h | 4 +- 3 files changed, 134 insertions(+), 78 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index eb7cbd2a5..c92368deb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -113,6 +113,8 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); +template < typename Index > +void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -130,8 +132,11 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - int *BlockIterDevice, int oddEvenBlock); -__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + +template < typename Index > +__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); /*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 7e4028fbe..817811c84 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -235,13 +235,6 @@ solve( const MeshPointer& mesh, { // TODO: CUDA code #ifdef HAVE_CUDA - - Real *dAux; - cudaMalloc(&dAux, ( mesh->getDimensions().x() * mesh->getDimensions().y() ) * sizeof( Real ) ); - - - - const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); @@ -250,18 +243,30 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); + TNL::Containers::Array< int, Devices::Host, IndexType > BlockIter; + BlockIter.setSize( numBlocksX * numBlocksY ); + BlockIter.setValue( 0 ); + /*int* BlockIter = (int*)malloc( ( numBlocksX * numBlocksY ) * sizeof( int ) ); + for( int i = 0; i < numBlocksX*numBlocksY +1; i++) + BlockIter[i] = 1;*/ - //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); - - int *BlockIterDevice; int BlockIterD = 1; + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY ); + BlockIterDevice.setValue( 1 ); + /*int *BlockIterDevice; cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) ); + cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice);*/ + int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); - int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); - int oddEvenBlock = 0; + + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + dBlock.setValue( 0 ); + /*int *dBlock; + cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ + while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) @@ -270,89 +275,132 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); - TNL_CHECK_CUDA_DEVICE; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); - TNL_CHECK_CUDA_DEVICE; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + BlockIter = BlockIterDevice; + //cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyDeviceToHost); + GetNeighbours( BlockIter, numBlocksX, numBlocksY ); + + BlockIterDevice = BlockIter; + //cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + + CudaParallelReduc<<< nBlocks, 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - //aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); - cudaFree( dAux ); - cudaFree( BlockIterDevice ); + /*cudaFree( BlockIterDevice ); cudaFree( dBlock ); + delete BlockIter;*/ cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - //aux = *auxPtr; - //interfaceMap = *interfaceMapPtr; + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; #endif } iteration++; } aux.save("aux-final.tnl"); } - -#ifdef HAVE_CUDA -/*template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) +template < typename Index > +void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.template getMesh< Devices::Cuda >(); - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && a == 1 ) - { - dAux[ j*mesh.getDimensions().x() + i ] = aux[ j*mesh.getDimensions().x() + i ]; - } - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && a == 0 ) - { - aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; - } - -}*/ + TNL::Containers::Array< int, Devices::Host, Index > BlockIterPom; + BlockIterPom.setSize( numBlockX * numBlockY ); + BlockIterPom.setValue( 0 ); + /*int* BlockIterPom; + BlockIterPom = new int[numBlockX * numBlockY];*/ + /*for(int i = 0; i < numBlockX * numBlockY; i++) + BlockIterPom[ i ] = 0;*/ + for(int i = 0; i < numBlockX * numBlockY; i++) + { + + if( BlockIter[ i ] ) + { + // i = k*numBlockY + m; + int m=0, k=0; + m = i%numBlockY; + k = i/numBlockY; + if( k > 0 && numBlockY > 1 ) + BlockIterPom[i - numBlockX] = 1; + if( k < numBlockY-1 && numBlockY > 1 ) + BlockIterPom[i + numBlockX] = 1; + + if( m >= 0 && m < numBlockX - 1 && numBlockX > 1 ) + BlockIterPom[ i+1 ] = 1; + if( m <= numBlockX -1 && m > 0 && numBlockX > 1 ) + BlockIterPom[ i-1 ] = 1; + } + } + for(int i = 0; i < numBlockX * numBlockY; i++ ){ +/// if( !BlockIter[ i ] ) + BlockIter[ i ] = BlockIterPom[ i ]; +/// else +/// BlockIter[ i ] = 0; + } + /*for( int i = numBlockX-1; i > -1; i-- ) + { + for( int j = 0; j< numBlockY; j++ ) + std::cout << BlockIter[ i*numBlockY + j ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + //delete[] BlockIterPom; +} -__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) +#ifdef HAVE_CUDA +template < typename Index > +__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) { int i = threadIdx.x; int blId = blockIdx.x; + /*if ( i == 0 && blId == 0 ){ + printf( "nBlocks = %d \n", nBlocks ); + for( int j = nBlocks-1; j > -1 ; j--){ + printf( "cislo = %d \n", BlockIterDevice[ j ] ); + } + }*/ __shared__ volatile int sArray[ 512 ]; - sArray[ i ] = false; - if(blId * 1024 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 1024 + i ]; + sArray[ i ] = 0; + if( blId * 512 + i < nBlocks ) + sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; + __syncthreads(); - if (blockDim.x * blockDim.y == 1024) { + if (blockDim.x == 1024) { if (i < 512) - sArray[ i ] += sArray[ i ]; + sArray[ i ] += sArray[ i + 512 ]; } __syncthreads(); - if (blockDim.x * blockDim.y >= 512) { + if (blockDim.x >= 512) { if (i < 256) { - sArray[ i ] += sArray[ i ]; + sArray[ i ] += sArray[ i + 256 ]; } } - if (blockDim.x * blockDim.y >= 256) { + __syncthreads(); + if (blockDim.x >= 256) { if (i < 128) { sArray[ i ] += sArray[ i + 128 ]; } } __syncthreads(); - if (blockDim.x * blockDim.y >= 128) { + if (blockDim.x >= 128) { if (i < 64) { sArray[ i ] += sArray[ i + 64 ]; } @@ -360,12 +408,12 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock __syncthreads(); if (i < 32 ) { - if( blockDim.x * blockDim.y >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x * blockDim.y >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x * blockDim.y >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x * blockDim.y >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x * blockDim.y >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x * blockDim.y >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; + if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; + if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; + if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; + if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; + if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; } if( i == 0 ) @@ -378,14 +426,15 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - int *BlockIterDevice, int oddEvenBlock ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int i = thri + blockDim.x*blIdx; int j = blockDim.y*blIdy + thrj; int currentIndex = thrj * blockDim.x + thri; - + if( BlockIterDevice[ blIdy * gridDim.x + blIdx] ) + { //__shared__ volatile bool changed[ blockDim.x*blockDim.y ]; __shared__ volatile bool changed[16*16]; changed[ currentIndex ] = false; @@ -424,13 +473,13 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*blockDim.y+1; - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; + //BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; } __syncthreads(); - if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) - { - + if(thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; + if( thri == 0 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) @@ -528,14 +577,16 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) + if( changed[ 0 ] && thri == 0 && thrj == 0 ){ BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; + } __syncthreads(); } - + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; - } + /*if( thri == 0 && thrj == 0 ) + printf( "Block ID = %d, value = %d \n", (blIdy * numOfBlockx + blIdx), BlockIterDevice[ blIdy * numOfBlockx + blIdx ] );*/ } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index b024979cc..8c85745cd 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -272,8 +272,8 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + //CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); + //CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); -- GitLab From 444e01c4c17449ccfcf432caf9bcf32ff6e80dfc Mon Sep 17 00:00:00 2001 From: Fencl Date: Mon, 22 Oct 2018 21:13:54 +0200 Subject: [PATCH 03/20] FIM method is now faster than chess method but some random error occurs. --- .../tnlDirectEikonalMethodsBase.h | 24 +- .../tnlDirectEikonalMethodsBase_impl.h | 193 ++++ .../hamilton-jacobi/tnlFastSweepingMethod.h | 3 +- .../tnlFastSweepingMethod2D_impl.h | 846 +++++++++--------- .../tnlFastSweepingMethod3D_impl.h | 31 +- 5 files changed, 657 insertions(+), 440 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index eb7cbd2a5..08ed947ed 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -61,8 +61,9 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > typedef Index IndexType; typedef Functions::MeshFunction< MeshType > MeshFunctionType; typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; void initInterface( const MeshFunctionPointer& input, MeshFunctionPointer& output, @@ -76,6 +77,11 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ bool updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); + void updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + ArrayContainer BlockIterHost, int numThreadsPerBlock ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, @@ -113,6 +119,8 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); +template < typename Index > +void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -130,11 +138,15 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - int *BlockIterDevice, int oddEvenBlock); -__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 ); + +template < typename Index > +__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); -/*template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ +template < typename Index > +__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, @@ -150,7 +162,7 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - int *BlockIterDevice ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); #endif #include "tnlDirectEikonalMethodsBase_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index cfea6aca0..1f9fc5eeb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -89,6 +89,199 @@ initInterface( const MeshFunctionPointer& _input, } } +template< typename Real, + typename Device, + typename Index > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + ArrayContainer BlockIterHost, int numThreadsPerBlock ) +{ + for( int i = 0; i < BlockIterHost.getSize(); i++ ) + { + if( BlockIterHost[ i ] ) + { + MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + int xkolik = numThreadsPerBlock + 1; + int ykolik = numThreadsPerBlock + 1; + + int blIdx = i%numOfBlockx; + int blIdy = i/numOfBlocky; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*numThreadsPerBlock+1; + + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*numThreadsPerBlock+1; + + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; + changed[ 0 ] = 1;*/ + Real hx = mesh.getSpaceSteps().x(); + Real hy = mesh.getSpaceSteps().y(); + + Real changed1[ 16*16 ]; + /*Real changed2[ 16*16 ]; + Real changed3[ 16*16 ]; + Real changed4[ 16*16 ];*/ + Real sArray[18][18]; + + for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ ) + for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ ) + sArray[thrj][thri] = std::numeric_limits< Real >::max(); + + BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; + + for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) + { + if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) + sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + + + if( blIdx != 0 && thrj+1 < ykolik ) + sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) + sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + + if( blIdy != 0 && thrj+1 < xkolik ) + sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = 0; l < numThreadsPerBlock; l++ ) + sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = 0; l < numThreadsPerBlock; l++ ){ + changed1[ k*numThreadsPerBlock + l ] = 0; + /*changed2[ k*numThreadsPerBlock + l ] = 0; + changed3[ k*numThreadsPerBlock + l ] = 0; + changed4[ k*numThreadsPerBlock + l ] = 0;*/ + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ]; + /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ]; + changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ]; + changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/ + } + + if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ ) + BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1; + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY && + (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) ) + aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ]; + //std::cout<< sArray[k+1][l+1]; + } + //std::cout< +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) +{ + int* BlockIterPom; + BlockIterPom = new int [numBlockX * numBlockY]; + + for(int i = 0; i < numBlockX * numBlockY; i++) + { + BlockIterPom[ i ] = 0; + if( BlockIterHost[ i ] ) + { + // i = k*numBlockY + m; + int m=0, k=0; + m = i%numBlockX; + k = i/numBlockX; + if( k > 0 ) + BlockIterPom[i - numBlockX] = 1; + if( k < numBlockY - 1 ) + BlockIterPom[i + numBlockX] = 1; + + if( m < numBlockX - 1 ) + BlockIterPom[ i+1 ] = 1; + if( m > 0 ) + BlockIterPom[ i-1 ] = 1; + } + } + for(int i = 0; i < numBlockX * numBlockY; i++ ) + //if( !BlockIter[ i ] ) + BlockIterHost[ i ] = BlockIterPom[ i ]; + /*else + BlockIter[ i ] = 0;*/ + /*for( int i = numBlockX-1; i > -1; i-- ) + { + for( int j = 0; j< numBlockY; j++ ) + std::cout << BlockIterHost[ i*numBlockY + j ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + delete[] BlockIterPom; +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index fa8077427..60c690e06 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -88,7 +88,8 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > using typename BaseType::InterfaceMapType; using typename BaseType::MeshFunctionType; using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; FastSweepingMethod(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 7e4028fbe..e23148db5 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -21,355 +21,348 @@ #include template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 1 ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > const Index& FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: getMaxIterations() const { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: setMaxIterations( const IndexType& maxIterations ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ) -{ - /*MeshFunctionType v; - v.setMesh(mesh); - double A[320][320]; - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - A[i][j] = 0; - - std::ifstream file("/home/maty/Downloads/mapa2.txt"); - - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - file >> A[i][j]; - file.close(); - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - v[i*320 + j] = A[i][j]; - v.save("mapa.tnl");*/ - - - MeshFunctionPointer auxPtr; - InterfaceMapPointer interfaceMapPtr; - auxPtr->setMesh( mesh ); - interfaceMapPtr->setMesh( mesh ); - std::cout << "Initiating the interface cells ..." << std::endl; - BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ) +{ + MeshFunctionPointer auxPtr; + InterfaceMapPointer interfaceMapPtr; + auxPtr->setMesh( mesh ); + interfaceMapPtr->setMesh( mesh ); + std::cout << "Initiating the interface cells ..." << std::endl; + BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + + auxPtr->save( "aux-ini.tnl" ); + + typename MeshType::Cell cell( *mesh ); + + IndexType iteration( 0 ); + InterfaceMapType interfaceMap = *interfaceMapPtr; + MeshFunctionType aux = *auxPtr; + + + + + while( iteration < this->maxIterations ) + { + if( std::is_same< DeviceType, Devices::Host >::value ) + { + int numThreadsPerBlock = 16; + + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + + + ArrayContainer BlockIterHost; + BlockIterHost.setSize( numBlocksX * numBlocksY ); + BlockIterHost.setValue( 1 ); + /*for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<updateBlocks( interfaceMap, aux, BlockIterHost, numThreadsPerBlock); - auxPtr->save( "aux-ini.tnl" ); - - typename MeshType::Cell cell( *mesh ); - - IndexType iteration( 0 ); - InterfaceMapType interfaceMap = *interfaceMapPtr; - MeshFunctionType aux = *auxPtr; - while( iteration < this->maxIterations ) - { - if( std::is_same< DeviceType, Devices::Host >::value ) - { - for( cell.getCoordinates().y() = 0; + this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY ); + + //Reduction + for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + //std::cout<< BlockIterHost[ l*numBlocksX + k ]; + BlockIterHost[ 0 ] = BlockIterHost[ 0 ] || BlockIterHost[ l*numBlocksX + k ]; + } + //std::cout<getDimensions().y(); cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-1.tnl" ); - - for( cell.getCoordinates().y() = 0; + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh->getDimensions().y(); cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-2.tnl" ); - - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-2.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; cell.getCoordinates().y() >= 0 ; cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-3.tnl" ); - - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; cell.getCoordinates().y() >= 0; cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-4.tnl" ); - - /*for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().y(); - cell.getCoordinates().x()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().x(); - cell.getCoordinates().y()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - - aux.save( "aux-5.tnl" ); - - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().y(); - cell.getCoordinates().x()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().x() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - aux.save( "aux-6.tnl" ); - - for( cell.getCoordinates().x() = mesh->getDimensions().y() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().x(); - cell.getCoordinates().y()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().x() = mesh->getDimensions().y() - 1; - cell.getCoordinates().x() >= 0; - cell.getCoordinates().x()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().x() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - }*/ - } - if( std::is_same< DeviceType, Devices::Cuda >::value ) { - // TODO: CUDA code + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + }*/ + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { + // TODO: CUDA code #ifdef HAVE_CUDA - - Real *dAux; - cudaMalloc(&dAux, ( mesh->getDimensions().x() * mesh->getDimensions().y() ) * sizeof( Real ) ); - - - - - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY ); - - tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - - //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); - - //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); - - int *BlockIterDevice; - int BlockIterD = 1; - - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) ); - int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); - int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); - int oddEvenBlock = 0; - while( BlockIterD ) - { - /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ i ] = false;*/ - - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); - TNL_CHECK_CUDA_DEVICE; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); - TNL_CHECK_CUDA_DEVICE; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - - /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - - } - //aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); - cudaFree( dAux ); - cudaFree( BlockIterDevice ); - cudaFree( dBlock ); - cudaDeviceSynchronize(); - - TNL_CHECK_CUDA_DEVICE; - - //aux = *auxPtr; - //interfaceMap = *interfaceMapPtr; -#endif + TNL_CHECK_CUDA_DEVICE; + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY ); + + tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; + + int BlockIterD = 1; + + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY ); + BlockIterDevice.setValue( 1 ); + TNL_CHECK_CUDA_DEVICE; + int ne = 0; + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, ne); + TNL_CHECK_CUDA_DEVICE; + + /*TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; + BlockIterPom.setSize( numBlocksX * numBlocksY ); + BlockIterPom.setValue( 0 );*/ + /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ + /*int *BlockIterDevice; + cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ + int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); + //std::cout << "nBlocksNeigh = " << nBlocksNeigh << std::endl; + //free( BlockIter ); + /*int *BlockIterPom; + cudaMalloc((void**) &BlockIterPom, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ + + int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + TNL_CHECK_CUDA_DEVICE; + /*int *dBlock; + cudaMalloc((void**) &dBlock, nBlocks * sizeof( int ) );*/ + //int pocIter = 0; + while( BlockIterD ) + { + /*BlockIterPom1 = BlockIterDevice; + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << BlockIterPom1[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, 1); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + /*int poc = 0; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + if( BlockIterPom1[ i ] ) + poc = poc+1; + std::cout << "pocet bloku, ktere se pocitali = " << poc << std::endl;*/ + + GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, /*BlockIterPom,*/ numBlocksX, numBlocksY ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + TNL_CHECK_CUDA_DEVICE; + + BlockIterD = dBlock.getElement( 0 ); + //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) + BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ + //pocIter ++; } - iteration++; - } - aux.save("aux-final.tnl"); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + //std::cout<< pocIter << std::endl; + + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; +#endif + } + iteration++; + } + aux.save("aux-final.tnl"); } #ifdef HAVE_CUDA -/*template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) +template < typename Index > +__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.template getMesh< Devices::Cuda >(); - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && a == 1 ) - { - dAux[ j*mesh.getDimensions().x() + i ] = aux[ j*mesh.getDimensions().x() + i ]; - } - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && a == 0 ) - { - aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; - } + int i = blockIdx.x * 1024 + threadIdx.x; + + if( i < numBlockX * numBlockY ) + { + int pom = 0;//BlockIterPom[ i ] = 0; + int m=0, k=0; + m = i%numBlockX; + k = i/numBlockX; + if( m > 0 ) + if( BlockIterDevice[ i - 1 ] ) + pom = 1;//BlockIterPom[ i ] = 1; + if( m < numBlockX -1 && pom == 0 ) + if( BlockIterDevice[ i + 1 ] ) + pom = 1;//BlockIterPom[ i ] = 1; + if( k > 0 && pom == 0 ) + if( BlockIterDevice[ i - numBlockX ] ) + pom = 1;// BlockIterPom[ i ] = 1; + if( k < numBlockY -1 && pom == 0 ) + if( BlockIterDevice[ i + numBlockX ] ) + pom = 1;//BlockIterPom[ i ] = 1; -}*/ + + + BlockIterDevice[ i ] = pom;//BlockIterPom[ i ]; + } +} -__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) +template < typename Index > +__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) { - int i = threadIdx.x; - int blId = blockIdx.x; - __shared__ volatile int sArray[ 512 ]; - sArray[ i ] = false; - if(blId * 1024 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 1024 + i ]; - - if (blockDim.x * blockDim.y == 1024) { - if (i < 512) - sArray[ i ] += sArray[ i ]; + int i = threadIdx.x; + int blId = blockIdx.x; + __shared__ volatile int sArray[ 512 ]; + sArray[ i ] = 0; + if(blId * 512 + i < nBlocks ) + sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; + __syncthreads(); + if (blockDim.x == 1024) { + if (i < 512) + sArray[ i ] += sArray[ i + 512 ]; + } + __syncthreads(); + if (blockDim.x >= 512) { + if (i < 256) { + sArray[ i ] += sArray[ i + 256 ]; } - __syncthreads(); - if (blockDim.x * blockDim.y >= 512) { - if (i < 256) { - sArray[ i ] += sArray[ i ]; - } + } + if (blockDim.x >= 256) { + if (i < 128) { + sArray[ i ] += sArray[ i + 128 ]; } - if (blockDim.x * blockDim.y >= 256) { - if (i < 128) { - sArray[ i ] += sArray[ i + 128 ]; - } + } + __syncthreads(); + if (blockDim.x >= 128) { + if (i < 64) { + sArray[ i ] += sArray[ i + 64 ]; } - __syncthreads(); - if (blockDim.x * blockDim.y >= 128) { - if (i < 64) { - sArray[ i ] += sArray[ i + 64 ]; - } - } - __syncthreads(); - if (i < 32 ) - { - if( blockDim.x * blockDim.y >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x * blockDim.y >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x * blockDim.y >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x * blockDim.y >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x * blockDim.y >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x * blockDim.y >= 2 ) sArray[ i ] += sArray[ i + 1 ]; - } - - if( i == 0 ) - dBlock[ blId ] = sArray[ 0 ]; + } + __syncthreads(); + if (i < 32 ) + { + if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; + if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; + if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; + if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; + if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; + if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + } + + if( i == 0 ) + dBlock[ blId ] = sArray[ 0 ]; } @@ -378,10 +371,40 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - int *BlockIterDevice, int oddEvenBlock ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne ) { - int thri = threadIdx.x; int thrj = threadIdx.y; - int blIdx = blockIdx.x; int blIdy = blockIdx.y; + int thri = threadIdx.x; int thrj = threadIdx.y; + int blIdx = blockIdx.x; int blIdy = blockIdx.y; + int grIdx = gridDim.x; + + if( BlockIterDevice[ blIdy * grIdx + blIdx] ) + { + + const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + __shared__ volatile int numOfBlockx; + __shared__ volatile int numOfBlocky; + __shared__ int xkolik; + __shared__ int ykolik; + __shared__ volatile int NE; + if( thri == 0 && thrj == 0 ) + { + xkolik = blockDim.x + 1; + ykolik = blockDim.y + 1; + numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); + numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*blockDim.x+1; + + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*blockDim.y+1; + BlockIterDevice[ blIdy * grIdx + blIdx ] = 0; + NE = ne; + } + __syncthreads(); + int i = thri + blockDim.x*blIdx; int j = blockDim.y*blIdy + thrj; int currentIndex = thrj * blockDim.x + thri; @@ -389,17 +412,15 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< //__shared__ volatile bool changed[ blockDim.x*blockDim.y ]; __shared__ volatile bool changed[16*16]; changed[ currentIndex ] = false; - if( thrj == 0 && thri == 0 ) - changed[ 0 ] = true; + changed[ 0 ] = true; - const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real hx; __shared__ Real hy; if( thrj == 1 && thri == 1 ) { - hx = mesh.getSpaceSteps().x(); - hy = mesh.getSpaceSteps().y(); + hx = mesh.getSpaceSteps().x(); + hy = mesh.getSpaceSteps().y(); } //__shared__ volatile Real sArray[ blockDim.y+2 ][ blockDim.x+2 ]; @@ -407,135 +428,110 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< sArray[thrj][thri] = std::numeric_limits< Real >::max(); //filling sArray edges - int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - __shared__ volatile int numOfBlockx; - __shared__ volatile int numOfBlocky; - __shared__ int xkolik; - __shared__ int ykolik; - if( thri == 0 && thrj == 0 ) + if( thri == 0 ) + { + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && NE == 1 ) + sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + } + + if( thri == 1 ) { - xkolik = blockDim.x + 1; - ykolik = blockDim.y + 1; - numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); - numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + if( blIdx != 0 && thrj+1 < ykolik && NE == 1 ) + sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + } - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; + if( thri == 2 ) + { + if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik && NE == 1 ) + sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); } - __syncthreads(); - if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) + if( thri == 3 ) { + if( blIdy != 0 && thrj+1 < xkolik && NE == 1 ) + sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } - if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - } - - if( thri == 1 ) + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + } + __syncthreads(); + + while( changed[ 0 ] ) + { + __syncthreads(); + + changed[ currentIndex] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) { - if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); } - - if( thri == 2 ) + } + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y == 1024 ) + { + if( currentIndex < 512 ) { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; } - - if( thri == 3 ) + } + __syncthreads(); + if( blockDim.x*blockDim.y >= 512 ) + { + if( currentIndex < 256 ) { - if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; } - - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + } + __syncthreads(); + if( blockDim.x*blockDim.y >= 256 ) + { + if( currentIndex < 128 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; } - __syncthreads(); - - while( changed[ 0 ] ) + } + __syncthreads(); + if( blockDim.x*blockDim.y >= 128 ) + { + if( currentIndex < 64 ) { - __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) - { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); - } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y == 1024 ) - { - if( currentIndex < 512 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 512 ) - { - if( currentIndex < 256 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 256 ) - { - if( currentIndex < 128 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 128 ) - { - if( currentIndex < 64 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; - } - } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU - { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; - } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; - __syncthreads(); + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; - + } + __syncthreads(); + if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + { + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + if( changed[ 0 ] && thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * grIdx + blIdx ] = 1; + __syncthreads(); } + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) + aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; + } } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index b024979cc..4daf9fc92 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -258,13 +258,21 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; - int *BlockIterDevice; + int BlockIterD = 1; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) ); + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterDevice.setValue( 1 ); + /*int *BlockIterDevice; + cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/ int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); - int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); + + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + dBlock.setValue( 0 ); + /*int *dBlock; + cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ while( BlockIterD ) { @@ -272,17 +280,24 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - cudaFree( BlockIterDevice ); - cudaFree( dBlock ); + //cudaFree( BlockIterDevice ); + //cudaFree( dBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; @@ -302,7 +317,7 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - int *BlockIterDevice ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; -- GitLab From 1607d6774ba61d957b87e37d820e21b697d96b4a Mon Sep 17 00:00:00 2001 From: Fencl Date: Tue, 30 Oct 2018 18:38:41 +0100 Subject: [PATCH 04/20] FIM method implemented for GPU and FIM-FSM implemented for CPU (parallel). --- .../tnlDirectEikonalMethodsBase.h | 22 +- .../tnlDirectEikonalMethodsBase_impl.h | 2045 +++++++++-------- .../tnlFastSweepingMethod2D_impl.h | 863 +++---- .../tnlFastSweepingMethod3D_impl.h | 5 - 4 files changed, 1411 insertions(+), 1524 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 0f45be71c..cbb1a1ff6 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -74,12 +74,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const MeshEntity& cell, const RealType velocity = 1.0 ); - __cuda_callable__ bool updateCell( volatile Real sArray[18][18], + template< int sizeSArray > + __cuda_callable__ bool updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); + + template< int sizeSArray > void updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, - ArrayContainer BlockIterHost, int numThreadsPerBlock ); + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; @@ -119,9 +123,6 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); -template < typename Index > -void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); - #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, @@ -134,15 +135,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ); -template < typename Real, typename Device, typename Index > +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, -<<<<<<< HEAD - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 ); -======= - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 1f9fc5eeb..95971c9b8 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -1,4 +1,4 @@ - /* +/* * File: tnlDirectEikonalMethodsBase_impl.h * Author: oberhuber * @@ -13,233 +13,259 @@ #include "tnlFastSweepingMethod.h" template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, - MeshFunctionPointer& _output, - InterfaceMapPointer& _interfaceMap ) + MeshFunctionPointer& _output, + InterfaceMapPointer& _interfaceMap ) { - if( std::is_same< Device, Devices::Cuda >::value ) - { + if( std::is_same< Device, Devices::Cuda >::value ) + { #ifdef HAVE_CUDA - const MeshType& mesh = _input->getMesh(); - - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize ); - dim3 gridSize( numBlocksX ); - Devices::Cuda::synchronizeDevice(); - CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), - _output.template modifyData< Device >(), - _interfaceMap.template modifyData< Device >() ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize ); + dim3 gridSize( numBlocksX ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; #endif - } - if( std::is_same< Device, Devices::Host >::value ) - { - const MeshType& mesh = _input->getMesh(); - typedef typename MeshType::Cell Cell; - const MeshFunctionType& input = _input.getData(); - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - Cell cell( mesh ); - for( cell.getCoordinates().x() = 0; + } + if( std::is_same< Device, Devices::Host >::value ) + { + const MeshType& mesh = _input->getMesh(); + typedef typename MeshType::Cell Cell; + const MeshFunctionType& input = _input.getData(); + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + Cell cell( mesh ); + for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh.getDimensions().x(); cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : - -std::numeric_limits< RealType >::max(); - interfaceMap[ cell.getIndex() ] = false; - } - - - const RealType& h = mesh.getSpaceSteps().x(); - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x() - 1; - cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : + -std::numeric_limits< RealType >::max(); + interfaceMap[ cell.getIndex() ] = false; + } + + + const RealType& h = mesh.getSpaceSteps().x(); + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x() - 1; + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + const auto& neighbors = cell.getNeighborEntities(); + Real pom = 0; + //const IndexType& c = cell.getIndex(); + const IndexType e = neighbors.template getEntityIndex< 1 >(); + if( c * input[ e ] <= 0 ) { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - const auto& neighbors = cell.getNeighborEntities(); - Real pom = 0; - //const IndexType& c = cell.getIndex(); - const IndexType e = neighbors.template getEntityIndex< 1 >(); - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( h * c )/( c - input[ e ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; - - pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) - output[ e ] = pom; - - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - } + pom = TNL::sign( c )*( h * c )/( c - input[ e ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + + pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) + output[ e ] = pom; + + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; } + } } + } } template< typename Real, - typename Device, - typename Index > -void + typename Device, + typename Index > +template< int sizeSArray > +void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateBlocks( InterfaceMapType interfaceMap, - MeshFunctionType aux, - ArrayContainer BlockIterHost, int numThreadsPerBlock ) + MeshFunctionType aux, + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { +#pragma omp parallel for schedule( dynamic ) for( int i = 0; i < BlockIterHost.getSize(); i++ ) { if( BlockIterHost[ i ] ) { MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); - + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); - int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; + int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; int xkolik = numThreadsPerBlock + 1; int ykolik = numThreadsPerBlock + 1; int blIdx = i%numOfBlockx; - int blIdy = i/numOfBlocky; + int blIdy = i/numOfBlockx; + //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*numThreadsPerBlock+1; if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*numThreadsPerBlock+1; - - + //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; - changed[ 0 ] = 1;*/ + changed[ 0 ] = 1;*/ Real hx = mesh.getSpaceSteps().x(); Real hy = mesh.getSpaceSteps().y(); - Real changed1[ 16*16 ]; - /*Real changed2[ 16*16 ]; - Real changed3[ 16*16 ]; - Real changed4[ 16*16 ];*/ - Real sArray[18][18]; + bool changed = false; + + + RealType *sArray; + sArray = new Real[ sizeSArray * sizeSArray ]; + if( sArray == nullptr ) + std::cout << "Error while allocating memory for sArray." << std::endl; + + for( int thri = 0; thri < sizeSArray; thri++ ){ + for( int thrj = 0; thrj < sizeSArray; thrj++ ) + sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); + } - for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ ) - for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ ) - sArray[thrj][thri] = std::numeric_limits< Real >::max(); - BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; - + for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - - + sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; + + if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); - + sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); - + sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; + if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; } - - for( int k = 0; k < numThreadsPerBlock; k++ ) - for( int l = 0; l < numThreadsPerBlock; l++ ) - sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; - - for( int k = 0; k < numThreadsPerBlock; k++ ) + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) + sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; + } + bool pom = false; + for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ){ - changed1[ k*numThreadsPerBlock + l ] = 0; - /*changed2[ k*numThreadsPerBlock + l ] = 0; - changed3[ k*numThreadsPerBlock + l ] = 0; - changed4[ k*numThreadsPerBlock + l ] = 0;*/ - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) - { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){ + //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy); + pom = this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + changed = changed || pom; } } } - - for( int k = numThreadsPerBlock-1; k > -1; k-- ) - for( int l = 0; l < numThreadsPerBlock; l++ ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + } + /*aux.save( "aux-1pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ) { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); } } } - - for( int k = 0; k < numThreadsPerBlock; k++ ) - for( int l = numThreadsPerBlock-1; l >-1; l-- ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + /*aux.save( "aux-2pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); } } } - - for( int k = numThreadsPerBlock-1; k > -1; k-- ) + /*aux.save( "aux-3pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ for( int l = numThreadsPerBlock-1; l >-1; l-- ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); } } } - - for( int k = numThreadsPerBlock-1; k > -1; k-- ) - for( int l = numThreadsPerBlock-1; l >-1; l-- ){ - changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ]; - /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ]; - changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ]; - changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/ + } + /*aux.save( "aux-4pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; } + std::cout << std::endl; + }*/ + - if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ ) + if( changed ){ BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1; - + } + + for( int k = 0; k < numThreadsPerBlock; k++ ){ - for( int l = 0; l < numThreadsPerBlock; l++ ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY && - (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) ) - aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ]; + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) + helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ]; //std::cout<< sArray[k+1][l+1]; } //std::cout< + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) @@ -249,643 +275,643 @@ getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) for(int i = 0; i < numBlockX * numBlockY; i++) { - BlockIterPom[ i ] = 0; - if( BlockIterHost[ i ] ) - { - // i = k*numBlockY + m; - int m=0, k=0; - m = i%numBlockX; - k = i/numBlockX; - if( k > 0 ) - BlockIterPom[i - numBlockX] = 1; - if( k < numBlockY - 1 ) - BlockIterPom[i + numBlockX] = 1; - - if( m < numBlockX - 1 ) - BlockIterPom[ i+1 ] = 1; - if( m > 0 ) - BlockIterPom[ i-1 ] = 1; + BlockIterPom[ i ] = 0;//BlockIterPom[ i ] = 0; + int m=0, k=0; + m = i%numBlockX; + k = i/numBlockX; + if( m > 0 && BlockIterHost[ i - 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ + BlockIterPom[ i ] = 1; } + //BlockIterPom[ i ]; } - for(int i = 0; i < numBlockX * numBlockY; i++ ) - //if( !BlockIter[ i ] ) - BlockIterHost[ i ] = BlockIterPom[ i ]; - /*else - BlockIter[ i ] = 0;*/ - /*for( int i = numBlockX-1; i > -1; i-- ) + + for(int i = 0; i < numBlockX * numBlockY; i++) { - for( int j = 0; j< numBlockY; j++ ) - std::cout << BlockIterHost[ i*numBlockY + j ]; - std::cout << std::endl; + if( !BlockIterHost[ i ] ) + BlockIterHost[ i ] = BlockIterPom[ i ]; } - std::cout << std::endl;*/ + /*else + BlockIter[ i ] = 0;*/ + /*for( int i = numBlockX-1; i > -1; i-- ) + { + for( int j = 0; j< numBlockY; j++ ) + std::cout << BlockIterHost[ i*numBlockY + j ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ delete[] BlockIterPom; } template< typename Real, - typename Device, - typename Index > - template< typename MeshEntity > + typename Device, + typename Index > +template< typename MeshEntity > void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType v ) + const MeshEntity& cell, + const RealType v ) { - const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); - const MeshType& mesh = cell.getMesh(); - const RealType& h = mesh.getSpaceSteps().x(); - const RealType value = u( cell ); - RealType a, tmp = std::numeric_limits< RealType >::max(); - - if( cell.getCoordinates().x() == 0 ) - a = u[ neighborEntities.template getEntityIndex< 1 >() ]; - else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) - a = u[ neighborEntities.template getEntityIndex< -1 >() ]; - else - { - a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], - u[ neighborEntities.template getEntityIndex< 1 >() ] ); - } - - if( fabs( a ) == std::numeric_limits< RealType >::max() ) - return; - - tmp = a + TNL::sign( value ) * h/v; - - u[ cell.getIndex() ] = argAbsMin( value, tmp ); + const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); + const MeshType& mesh = cell.getMesh(); + const RealType& h = mesh.getSpaceSteps().x(); + const RealType value = u( cell ); + RealType a, tmp = std::numeric_limits< RealType >::max(); + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], + u[ neighborEntities.template getEntityIndex< 1 >() ] ); + } + + if( fabs( a ) == std::numeric_limits< RealType >::max() ) + return; + + tmp = a + TNL::sign( value ) * h/v; + + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, - MeshFunctionPointer& _output, - InterfaceMapPointer& _interfaceMap ) + MeshFunctionPointer& _output, + InterfaceMapPointer& _interfaceMap ) { - - if( std::is_same< Device, Devices::Cuda >::value ) - { + + if( std::is_same< Device, Devices::Cuda >::value ) + { #ifdef HAVE_CUDA - const MeshType& mesh = _input->getMesh(); - - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY ); - Devices::Cuda::synchronizeDevice(); - CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), - _output.template modifyData< Device >(), - _interfaceMap.template modifyData< Device >() ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; #endif - } - if( std::is_same< Device, Devices::Host >::value ) - { - MeshFunctionType input = _input.getData(); - - /*double A[320][320]; - std::ifstream fileInit("/home/maty/Downloads/initData.txt"); - - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - fileInit >> A[i][j]; - fileInit.close(); - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - input[i*320 + j] = A[i][j];*/ - - - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - const MeshType& mesh = input.getMesh(); - typedef typename MeshType::Cell Cell; - Cell cell( mesh ); - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); - interfaceMap[ cell.getIndex() ] = false; - } - - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - for( cell.getCoordinates().y() = 0; + } + if( std::is_same< Device, Devices::Host >::value ) + { + MeshFunctionType input = _input.getData(); + + /*double A[320][320]; + std::ifstream fileInit("/home/maty/Downloads/initData.txt"); + + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + fileInit >> A[j]; + fileInit.close(); + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + input[i*320 + j] = A[j];*/ + + + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + const MeshType& mesh = input.getMesh(); + typedef typename MeshType::Cell Cell; + Cell cell( mesh ); + for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : + - std::numeric_limits< RealType >::max(); + interfaceMap[ cell.getIndex() ] = false; + } + + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const IndexType e = neighbors.template getEntityIndex< 1, 0 >(); + const IndexType n = neighbors.template getEntityIndex< 0, 1 >(); + //Try init with exact data: + /*if( c * input[ n ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ n ] = input[ n ]; + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; + } + if( c * input[ e ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ e ] = input[ e ]; + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + }*/ + if( c * input[ n ] <= 0 ) { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) + /*if( c >= 0 ) + {*/ + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + pom = pom - TNL::sign( c )*hy; + if( TNL::abs( output[ n ] ) > TNL::abs( pom ) ) + output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy; + /*}else { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const IndexType e = neighbors.template getEntityIndex< 1, 0 >(); - const IndexType n = neighbors.template getEntityIndex< 0, 1 >(); - //Try init with exact data: - /*if( c * input[ n ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ n ] = input[ n ]; - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ e ] = input[ e ]; - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - }*/ - if( c * input[ n ] <= 0 ) - { - /*if( c >= 0 ) - {*/ - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; - pom = pom - TNL::sign( c )*hy; - if( TNL::abs( output[ n ] ) > TNL::abs( pom ) ) - output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy; - /*}else - { - pom = - ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - if( output[ n ] > hy + pom ) - output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); - }*/ - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - /*if( c >= 0 ) - {*/ - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; - - pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) - output[ e ] = pom; - /*}else - { - pom = - (hx * c)/( c - input[ e ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ e ] > pom ) - output[ e ] = pom; - }*/ - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - } + pom = - ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + if( output[ n ] > hy + pom ) + output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); + }*/ + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; } + if( c * input[ e ] <= 0 ) + { + /*if( c >= 0 ) + {*/ + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + + pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) + output[ e ] = pom; + /*}else + { + pom = - (hx * c)/( c - input[ e ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ e ] > pom ) + output[ e ] = pom; + }*/ + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + } + } } + } } template< typename Real, - typename Device, - typename Index > - template< typename MeshEntity > + typename Device, + typename Index > +template< typename MeshEntity > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType v) + const MeshEntity& cell, + const RealType v) { - const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); - const MeshType& mesh = cell.getMesh(); - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType value = u( cell ); - RealType a, b, tmp = std::numeric_limits< RealType >::max(); - - if( cell.getCoordinates().x() == 0 ) - a = u[ neighborEntities.template getEntityIndex< 1, 0 >() ]; - else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) - a = u[ neighborEntities.template getEntityIndex< -1, 0 >() ]; - else - { - a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0 >() ], - u[ neighborEntities.template getEntityIndex< 1, 0 >() ] ); - } - - if( cell.getCoordinates().y() == 0 ) - b = u[ neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) - b = u[ neighborEntities.template getEntityIndex< 0, -1 >() ]; - else - { - b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1 >() ], - u[ neighborEntities.template getEntityIndex< 0, 1 >() ] ); - } - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() ) - return; - /*if( fabs( a ) == TypeInfo< Real >::getMaxValue() || - fabs( b ) == TypeInfo< Real >::getMaxValue() || - fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) + const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); + const MeshType& mesh = cell.getMesh(); + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType value = u( cell ); + RealType a, b, tmp = std::numeric_limits< RealType >::max(); + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1, 0 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1, 0 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0 >() ], + u[ neighborEntities.template getEntityIndex< 1, 0 >() ] ); + } + + if( cell.getCoordinates().y() == 0 ) + b = u[ neighborEntities.template getEntityIndex< 0, 1 >()]; + else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) + b = u[ neighborEntities.template getEntityIndex< 0, -1 >() ]; + else + { + b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1 >() ], + u[ neighborEntities.template getEntityIndex< 0, 1 >() ] ); + } + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() ) + return; + /*if( fabs( a ) == TypeInfo< Real >::getMaxValue() || + fabs( b ) == TypeInfo< Real >::getMaxValue() || + fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) { - tmp = - fabs( a ) >= fabs( b ) ? b + TNL::sign( value ) * hy : - a + TNL::sign( value ) * hx; + tmp = + fabs( a ) >= fabs( b ) ? b + TNL::sign( value ) * hy : + a + TNL::sign( value ) * hx; }*/ - /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && - fabs( b ) != TypeInfo< Real >::getMaxValue() && - fabs( a - b ) < TNL::sqrt( (hx * hx + hy * hy)/v ) ) + /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && + fabs( b ) != TypeInfo< Real >::getMaxValue() && + fabs( a - b ) < TNL::sqrt( (hx * hx + hy * hy)/v ) ) { - tmp = ( hx * hx * b + hy * hy * a + - sign( value ) * hx * hy * TNL::sqrt( ( hx * hx + hy * hy )/v - - ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); - u[ cell.getIndex() ] = tmp; + tmp = ( hx * hx * b + hy * hy * a + + sign( value ) * hx * hy * TNL::sqrt( ( hx * hx + hy * hy )/v - + ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); + u[ cell.getIndex() ] = tmp; } else { - tmp = - fabs( a ) > fabs( b ) ? b + TNL::sign( value ) * hy/v : - a + TNL::sign( value ) * hx/v; - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - //tmp = TypeInfo< RealType >::getMaxValue(); + tmp = + fabs( a ) > fabs( b ) ? b + TNL::sign( value ) * hy/v : + a + TNL::sign( value ) * hx/v; + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + //tmp = TypeInfo< RealType >::getMaxValue(); }*/ - RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; - - - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - } + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + } } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, - MeshFunctionPointer& _output, - InterfaceMapPointer& _interfaceMap ) + MeshFunctionPointer& _output, + InterfaceMapPointer& _interfaceMap ) { - if( std::is_same< Device, Devices::Cuda >::value ) - { + if( std::is_same< Device, Devices::Cuda >::value ) + { #ifdef HAVE_CUDA - const MeshType& mesh = _input->getMesh(); - - const int cudaBlockSize( 8 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); - int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); - if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) - std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; - dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); - Devices::Cuda::synchronizeDevice(); - CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(), - _output.template modifyData< Device >(), - _interfaceMap.template modifyData< Device >() ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 8 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); + if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) + std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; + dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; #endif - } - if( std::is_same< Device, Devices::Host >::value ) - { - const MeshFunctionType& input = _input.getData(); - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - const MeshType& mesh = input.getMesh(); - typedef typename MeshType::Cell Cell; - Cell cell( mesh ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh.getDimensions().z(); - cell.getCoordinates().z() ++ ) - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) > 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); - interfaceMap[ cell.getIndex() ] = false; - } - - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType& hz = mesh.getSpaceSteps().z(); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh.getDimensions().z(); - cell.getCoordinates().z() ++ ) - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) + } + if( std::is_same< Device, Devices::Host >::value ) + { + const MeshFunctionType& input = _input.getData(); + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + const MeshType& mesh = input.getMesh(); + typedef typename MeshType::Cell Cell; + Cell cell( mesh ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh.getDimensions().z(); + cell.getCoordinates().z() ++ ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) > 0 ? std::numeric_limits< RealType >::max() : + - std::numeric_limits< RealType >::max(); + interfaceMap[ cell.getIndex() ] = false; + } + + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType& hz = mesh.getSpaceSteps().z(); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh.getDimensions().z(); + cell.getCoordinates().z() ++ ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const IndexType e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const IndexType n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const IndexType t = neighbors.template getEntityIndex< 0, 0, 1 >(); + //Try exact initiation + /*const IndexType w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const IndexType s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const IndexType b = neighbors.template getEntityIndex< 0, 0, -1 >(); + if( c * input[ e ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ e ] = input[ e ]; + interfaceMap[ e ] = true; + interfaceMap[ cell.getIndex() ] = true; + } + else if( c * input[ n ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ n ] = input[ n ]; + interfaceMap[ n ] = true; + interfaceMap[ cell.getIndex() ] = true; + } + else if( c * input[ t ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ t ] = input[ t ]; + interfaceMap[ t ] = true; + interfaceMap[ cell.getIndex() ] = true; + }*/ + if( c * input[ n ] <= 0 ) + { + if( c >= 0 ) { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const IndexType e = neighbors.template getEntityIndex< 1, 0, 0 >(); - const IndexType n = neighbors.template getEntityIndex< 0, 1, 0 >(); - const IndexType t = neighbors.template getEntityIndex< 0, 0, 1 >(); - //Try exact initiation - /*const IndexType w = neighbors.template getEntityIndex< -1, 0, 0 >(); - const IndexType s = neighbors.template getEntityIndex< 0, -1, 0 >(); - const IndexType b = neighbors.template getEntityIndex< 0, 0, -1 >(); - if( c * input[ e ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ e ] = input[ e ]; - interfaceMap[ e ] = true; - interfaceMap[ cell.getIndex() ] = true; - } - else if( c * input[ n ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ n ] = input[ n ]; - interfaceMap[ n ] = true; - interfaceMap[ cell.getIndex() ] = true; - } - else if( c * input[ t ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ t ] = input[ t ]; - interfaceMap[ t ] = true; - interfaceMap[ cell.getIndex() ] = true; - }*/ - if( c * input[ n ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - if ( output[ n ] < pom - hy) - output[ n ] = pom - hy; // ( hy * c )/( c - input[ n ]) - hy; - - }else - { - pom = - ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - if( output[ n ] > hy + pom ) - output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); - - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hx * c )/( c - input[ e ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( output[ e ] < pom ) - output[ e ] = pom; - - }else - { - pom = - (hx * c)/( c - input[ e ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ e ] > pom ) - output[ e ] = pom; - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - if( c * input[ t ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hz * c )/( c - input[ t ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - pom = pom - hz; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( output[ t ] < pom ) - output[ t ] = pom; - - }else - { - pom = - (hz * c)/( c - input[ t ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hz; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ t ] > pom ) - output[ t ] = pom; - - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ t ] = true; - } - } - /*output[ cell.getIndex() ] = - c > 0 ? TypeInfo< RealType >::getMaxValue() : - -TypeInfo< RealType >::getMaxValue(); - interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245 + pom = ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + if ( output[ n ] < pom - hy) + output[ n ] = pom - hy; // ( hy * c )/( c - input[ n ]) - hy; + + }else + { + pom = - ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + if( output[ n ] > hy + pom ) + output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); + } - } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; + } + if( c * input[ e ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hx * c )/( c - input[ e ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( output[ e ] < pom ) + output[ e ] = pom; + + }else + { + pom = - (hx * c)/( c - input[ e ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ e ] > pom ) + output[ e ] = pom; + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + } + if( c * input[ t ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hz * c )/( c - input[ t ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + pom = pom - hz; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( output[ t ] < pom ) + output[ t ] = pom; + + }else + { + pom = - (hz * c)/( c - input[ t ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hz; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ t ] > pom ) + output[ t ] = pom; + + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ t ] = true; + } + } + /*output[ cell.getIndex() ] = + c > 0 ? TypeInfo< RealType >::getMaxValue() : + -TypeInfo< RealType >::getMaxValue(); + interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245 + } + } } template< typename Real, - typename Device, - typename Index > - template< typename MeshEntity > + typename Device, + typename Index > +template< typename MeshEntity > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType v ) + const MeshEntity& cell, + const RealType v ) { - const auto& neighborEntities = cell.template getNeighborEntities< 3 >(); - const MeshType& mesh = cell.getMesh(); + const auto& neighborEntities = cell.template getNeighborEntities< 3 >(); + const MeshType& mesh = cell.getMesh(); - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType& hz = mesh.getSpaceSteps().z(); - const RealType value = u( cell ); - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - - if( cell.getCoordinates().x() == 0 ) - a = u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ]; - else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) - a = u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ]; - else + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType& hz = mesh.getSpaceSteps().z(); + const RealType value = u( cell ); + //std::cout << value << std::endl; + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ], + u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] ); + } + if( cell.getCoordinates().y() == 0 ) + b = u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ]; + else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) + b = u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ]; + else + { + b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ], + u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] ); + }if( cell.getCoordinates().z() == 0 ) + c = u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ]; + else if( cell.getCoordinates().z() == mesh.getDimensions().z() - 1 ) + c = u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ]; + else + { + c = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ], + u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] ); + } + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() && + fabs( c ) == std::numeric_limits< RealType >::max() ) + return; + + + /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && + fabs( b ) != TypeInfo< Real >::getMaxValue() && + fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) { - a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ], - u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] ); + tmp = ( hx * hx * a + hy * hy * b + + sign( value ) * hx * hy * sqrt( ( hx * hx + hy * hy )/v - + ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); } - if( cell.getCoordinates().y() == 0 ) - b = u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ]; - else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) - b = u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ]; - else - { - b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ], - u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] ); - }if( cell.getCoordinates().z() == 0 ) - c = u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ]; - else if( cell.getCoordinates().z() == mesh.getDimensions().z() - 1 ) - c = u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ]; - else + if( fabs( a ) != TypeInfo< Real >::getMaxValue() && + fabs( c ) != TypeInfo< Real >::getMaxValue() && + fabs( a - c ) >= TNL::sqrt( (hx * hx + hz * hz)/v ) ) { - c = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ], - u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] ); + tmp = ( hx * hx * a + hz * hz * c + + sign( value ) * hx * hz * sqrt( ( hx * hx + hz * hz )/v - + ( a - c ) * ( a - c ) ) )/( hx * hx + hz * hz ); } - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) - return; - - - /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && - fabs( b ) != TypeInfo< Real >::getMaxValue() && - fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) - { - tmp = ( hx * hx * a + hy * hy * b + - sign( value ) * hx * hy * sqrt( ( hx * hx + hy * hy )/v - - ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); - } - if( fabs( a ) != TypeInfo< Real >::getMaxValue() && - fabs( c ) != TypeInfo< Real >::getMaxValue() && - fabs( a - c ) >= TNL::sqrt( (hx * hx + hz * hz)/v ) ) - { - tmp = ( hx * hx * a + hz * hz * c + - sign( value ) * hx * hz * sqrt( ( hx * hx + hz * hz )/v - - ( a - c ) * ( a - c ) ) )/( hx * hx + hz * hz ); - } - if( fabs( b ) != TypeInfo< Real >::getMaxValue() && - fabs( c ) != TypeInfo< Real >::getMaxValue() && - fabs( b - c ) >= TNL::sqrt( (hy * hy + hz * hz)/v ) ) - { - tmp = ( hy * hy * b + hz * hz * c + - sign( value ) * hy * hz * sqrt( ( hy * hy + hz * hz )/v - - ( b - c ) * ( b - c ) ) )/( hy * hy + hz * hz ); - }*/ - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + if( fabs( b ) != TypeInfo< Real >::getMaxValue() && + fabs( c ) != TypeInfo< Real >::getMaxValue() && + fabs( b - c ) >= TNL::sqrt( (hy * hy + hz * hz)/v ) ) + { + tmp = ( hy * hy * b + hz * hz * c + + sign( value ) * hy * hz * sqrt( ( hy * hy + hz * hz )/v - + ( b - c ) * ( b - c ) ) )/( hy * hy + hz * hz ); + }*/ + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) { - u[ cell.getIndex() ] = argAbsMin( value, tmp ); + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } else { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - } + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } + } } template < typename T1, typename T2 > T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double v) { - T1 tmp; - if( fabs( a ) != std::numeric_limits< T1 >::max && - fabs( b ) != std::numeric_limits< T1 >::max && - fabs( a - b ) < ha/v )//TNL::sqrt( (ha * ha + hb * hb)/2 )/v ) - { - tmp = ( ha * ha * b + hb * hb * a + + T1 tmp; + if( fabs( a ) != std::numeric_limits< T1 >::max && + fabs( b ) != std::numeric_limits< T1 >::max && + fabs( a - b ) < ha/v )//TNL::sqrt( (ha * ha + hb * hb)/2 )/v ) + { + tmp = ( ha * ha * b + hb * hb * a + TNL::sign( value ) * ha * hb * TNL::sqrt( ( ha * ha + hb * hb )/( v * v ) - ( a - b ) * ( a - b ) ) )/( ha * ha + hb * hb ); - } - else - { - tmp = std::numeric_limits< T1 >::max; - } - - return tmp; + } + else + { + tmp = std::numeric_limits< T1 >::max; + } + + return tmp; } template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ) { - T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; - if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ - tmp[0] = pom[0]; tmp[1] = pom[1]; tmp[2] = pom[2]; - tmp[3] = pom[3]; tmp[4] = pom[4]; tmp[5] = pom[5]; - - } - else if( fabs(pom[0]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[1]) ){ - tmp[0] = pom[0]; tmp[1] = pom[2]; tmp[2] = pom[1]; - tmp[3] = pom[3]; tmp[4] = pom[5]; tmp[5] = pom[4]; - } - else if( fabs(pom[1]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[2]) ){ - tmp[0] = pom[1]; tmp[1] = pom[0]; tmp[2] = pom[2]; - tmp[3] = pom[4]; tmp[4] = pom[3]; tmp[5] = pom[5]; - } - else if( fabs(pom[1]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[0]) ){ - tmp[0] = pom[1]; tmp[1] = pom[2]; tmp[2] = pom[0]; - tmp[3] = pom[4]; tmp[4] = pom[5]; tmp[5] = pom[3]; - } - else if( fabs(pom[2]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[1]) ){ - tmp[0] = pom[2]; tmp[1] = pom[0]; tmp[2] = pom[1]; - tmp[3] = pom[5]; tmp[4] = pom[3]; tmp[5] = pom[4]; - } - else if( fabs(pom[2]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[0]) ){ - tmp[0] = pom[2]; tmp[1] = pom[1]; tmp[2] = pom[0]; - tmp[3] = pom[5]; tmp[4] = pom[4]; tmp[5] = pom[3]; - } + T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; + if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ + tmp[0] = pom[0]; tmp[1] = pom[1]; tmp[2] = pom[2]; + tmp[3] = pom[3]; tmp[4] = pom[4]; tmp[5] = pom[5]; - for( int i = 0; i < 6; i++ ) - { - pom[ i ] = tmp[ i ]; - } + } + else if( fabs(pom[0]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[1]) ){ + tmp[0] = pom[0]; tmp[1] = pom[2]; tmp[2] = pom[1]; + tmp[3] = pom[3]; tmp[4] = pom[5]; tmp[5] = pom[4]; + } + else if( fabs(pom[1]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[2]) ){ + tmp[0] = pom[1]; tmp[1] = pom[0]; tmp[2] = pom[2]; + tmp[3] = pom[4]; tmp[4] = pom[3]; tmp[5] = pom[5]; + } + else if( fabs(pom[1]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[0]) ){ + tmp[0] = pom[1]; tmp[1] = pom[2]; tmp[2] = pom[0]; + tmp[3] = pom[4]; tmp[4] = pom[5]; tmp[5] = pom[3]; + } + else if( fabs(pom[2]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[1]) ){ + tmp[0] = pom[2]; tmp[1] = pom[0]; tmp[2] = pom[1]; + tmp[3] = pom[5]; tmp[4] = pom[3]; tmp[5] = pom[4]; + } + else if( fabs(pom[2]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[0]) ){ + tmp[0] = pom[2]; tmp[1] = pom[1]; tmp[2] = pom[0]; + tmp[3] = pom[5]; tmp[4] = pom[4]; tmp[5] = pom[3]; + } + + for( int i = 0; i < 6; i++ ) + { + pom[ i ] = tmp[ i ]; + } } @@ -893,372 +919,373 @@ __cuda_callable__ void sortMinims( T1 pom[] ) #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + int i = threadIdx.x + blockDim.x*blockIdx.x; + const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() ) + { + typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; + cell.refresh(); + const Index cind = cell.getIndex(); + - if( i < mesh.getDimensions().x() ) + output[ cind ] = + input( cell ) >= 0 ? std::numeric_limits< Real >::max() : + - std::numeric_limits< Real >::max(); + interfaceMap[ cind ] = false; + + const Real& h = mesh.getSpaceSteps().x(); + cell.refresh(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? std::numeric_limits< Real >::max() : - - std::numeric_limits< Real >::max(); - interfaceMap[ cind ] = false; - - const Real& h = mesh.getSpaceSteps().x(); - cell.refresh(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1 >(); - const Index w = neighbors.template getEntityIndex< -1 >(); - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( h * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( h * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1 >(); + const Index w = neighbors.template getEntityIndex< -1 >(); + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } - + } + } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + const Meshes::Grid< 2, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; + cell.refresh(); + const Index cind = cell.getIndex(); + - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + output[ cind ] = + input( cell ) >= 0 ? std::numeric_limits< Real >::max() : + - std::numeric_limits< Real >::max(); + interfaceMap[ cind ] = false; + + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + cell.refresh(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? std::numeric_limits< Real >::max() : - - std::numeric_limits< Real >::max(); - interfaceMap[ cind ] = false; - - const Real& hx = mesh.getSpaceSteps().x(); - const Real& hy = mesh.getSpaceSteps().y(); - cell.refresh(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1, 0 >(); - const Index w = neighbors.template getEntityIndex< -1, 0 >(); - const Index n = neighbors.template getEntityIndex< 0, 1 >(); - const Index s = neighbors.template getEntityIndex< 0, -1 >(); - - if( c * input[ n ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cell.getIndex() ] = true; - } - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ s ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1 >(); + const Index s = neighbors.template getEntityIndex< 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cell.getIndex() ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } + } } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - int k = blockDim.z*blockIdx.z + threadIdx.z; - const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + int k = blockDim.z*blockIdx.z + threadIdx.z; + const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + { + typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; + cell.refresh(); + const Index cind = cell.getIndex(); + + + output[ cind ] = + input( cell ) >= 0 ? std::numeric_limits< Real >::max() : + - std::numeric_limits< Real >::max(); + interfaceMap[ cind ] = false; + cell.refresh(); - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + const Real& hz = mesh.getSpaceSteps().z(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? std::numeric_limits< Real >::max() : - - std::numeric_limits< Real >::max(); - interfaceMap[ cind ] = false; - cell.refresh(); - - const Real& hx = mesh.getSpaceSteps().x(); - const Real& hy = mesh.getSpaceSteps().y(); - const Real& hz = mesh.getSpaceSteps().z(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); - const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); - const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); - const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); - const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); - const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); - - if( c * input[ n ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ s ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ b ] <= 0 ) - { - pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ t ] <= 0 ) - { - pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); + const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ b ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ t ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } + } } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > +template< int sizeSArray > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: -updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, - const Real v ) +updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, + const Real v ) { - const RealType value = sArray[ thrj ][ thri ]; - RealType a, b, tmp = std::numeric_limits< RealType >::max(); - - b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], - sArray[ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], - sArray[ thrj ][ thri-1 ] ); - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; - - - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } + const RealType value = sArray[ thrj * sizeSArray + thri ]; + RealType a, b, tmp = std::numeric_limits< RealType >::max(); + + b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], + sArray[ (thrj-1) * sizeSArray + thri ] ); + + a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], + sArray[ thrj * sizeSArray + thri-1 ] ); + + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - - return false; + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + + return false; } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) { - const RealType value = sArray[ thri ]; - RealType a, tmp = std::numeric_limits< RealType >::max(); - - a = TNL::argAbsMin( sArray[ thri+1 ], - sArray[ thri-1 ] ); - - if( fabs( a ) == std::numeric_limits< RealType >::max() ) - return false; - - tmp = a + TNL::sign( value ) * h/v; - - - sArray[ thri ] = argAbsMin( value, tmp ); - - tmp = value - sArray[ thri ]; - if ( fabs( tmp ) > 0.001*h ) - return true; - else - return false; + const RealType value = sArray[ thri ]; + RealType a, tmp = std::numeric_limits< RealType >::max(); + + a = TNL::argAbsMin( sArray[ thri+1 ], + sArray[ thri-1 ] ); + + if( fabs( a ) == std::numeric_limits< RealType >::max() ) + return false; + + tmp = a + TNL::sign( value ) * h/v; + + + sArray[ thri ] = argAbsMin( value, tmp ); + + tmp = value - sArray[ thri ]; + if ( fabs( tmp ) > 0.001*h ) + return true; + else + return false; } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, const Real v ) { - const RealType value = sArray[thrk][thrj][thri]; - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], - sArray[ thrk-1 ][ thrj ][ thri ] ); - - b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], - sArray[ thrk ][ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], - sArray[ thrk ][ thrj ][ thri-1 ] ); - - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) + const RealType value = sArray[thrk][thrj][thri]; + //std::cout << value << std::endl; + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], + sArray[ thrk-1 ][ thrj ][ thri ] ); + + b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], + sArray[ thrk ][ thrj-1 ][ thri ] ); + + a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], + sArray[ thrk ][ thrj ][ thri-1 ] ); + + + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() && + fabs( c ) == std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + + sortMinims( pom ); + + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk ][ thrj ][ thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else return false; - - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - - sortMinims( pom ); - - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; + sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk ][ thrj ][ thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; } else { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk ][ thrj ][ thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; } - - return false; + } + + return false; } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 4520eab0a..e29421bb1 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -15,6 +15,7 @@ #include "tnlFastSweepingMethod.h" #include +#include #include @@ -80,116 +81,171 @@ solve( const MeshPointer& mesh, - while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 16; + int numThreadsPerBlock = 1024; + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; + + /*Real **sArray = new Real*[numBlocksX*numBlocksY]; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + sArray[ i ] = new Real [ (numThreadsPerBlock + 2)*(numThreadsPerBlock + 2)];*/ - ArrayContainer BlockIterHost; BlockIterHost.setSize( numBlocksX * numBlocksY ); BlockIterHost.setValue( 1 ); + int IsCalculationDone = 1; + + MeshFunctionPointer helpFunc( mesh ); + MeshFunctionPointer helpFunc1( mesh ); + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + //std::cout<< "Size = " << BlockIterHost.getSize() << std::endl; /*for( int k = numBlocksX-1; k >-1; k-- ){ - for( int l = 0; l < numBlocksY; l++ ){ - std::cout<< BlockIterHost[ l*numBlocksX + k ]; + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<template updateBlocks< 1026 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + + for( int i = 0; i < BlockIterHost.getSize(); i++ ){ + if( IsCalculationDone == 0 ){ + IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; + //break; + } } - std::cout<updateBlocks( interfaceMap, aux, BlockIterHost, numThreadsPerBlock); + numWhile++; + + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl; this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY ); - //Reduction - for( int k = numBlocksX-1; k >-1; k-- ){ - for( int l = 0; l < numBlocksY; l++ ){ - //std::cout<< BlockIterHost[ l*numBlocksX + k ]; - BlockIterHost[ 0 ] = BlockIterHost[ 0 ] || BlockIterHost[ l*numBlocksX + k ]; - } - //std::cout<-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << "BlockIterHost = "<< j*numBlocksX + i<< " ," << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + //Reduction + //std::cout<getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-1.tnl" ); - - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } + if( numWhile == 1 ){ + auxPtr = helpFunc; } + /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + delete []sArray[i];*/ - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-3.tnl" ); + /*for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-2.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + for( int j = 0; + j < mesh->getDimensions().y(); + j++ ) + { + for( int i = 0; + i < mesh->getDimensions().x(); + i++ ) + { + std::cout << aux[ i * mesh->getDimensions().y() + j ] << " "; + } + std::cout << std::endl; + }*/ - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - }*/ } if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code #ifdef HAVE_CUDA -<<<<<<< HEAD TNL_CHECK_CUDA_DEVICE; + // Maximum cudaBlockSite is 32. Because of maximum num. of threads in kernel. const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); @@ -203,19 +259,14 @@ solve( const MeshPointer& mesh, BlockIterDevice.setSize( numBlocksX * numBlocksY ); BlockIterDevice.setValue( 1 ); TNL_CHECK_CUDA_DEVICE; - int ne = 0; - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, ne); - TNL_CHECK_CUDA_DEVICE; + /*TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; - BlockIterPom.setSize( numBlocksX * numBlocksY ); - BlockIterPom.setValue( 0 );*/ + BlockIterPom.setSize( numBlocksX * numBlocksY ); + BlockIterPom.setValue( 0 );*/ /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -224,139 +275,125 @@ solve( const MeshPointer& mesh, /*int *BlockIterPom; cudaMalloc((void**) &BlockIterPom, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ - int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); + int nBlocks = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; - dBlock.setSize( nBlocks ); + dBlock.setSize( nBlocks ); TNL_CHECK_CUDA_DEVICE; /*int *dBlock; cudaMalloc((void**) &dBlock, nBlocks * sizeof( int ) );*/ - //int pocIter = 0; + + + MeshFunctionPointer helpFunc1; + helpFunc1->setMesh(mesh); + + MeshFunctionPointer helpFunc( mesh ); + + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + + int numIter = 0; + + //int oddEvenBlock = 0; while( BlockIterD ) { - /*BlockIterPom1 = BlockIterDevice; - for( int j = numBlocksY-1; j>-1; j-- ){ - for( int i = 0; i < numBlocksX; i++ ) - std::cout << BlockIterPom1[ j * numBlocksX + i ]; - std::cout << std::endl; - } - std::cout << std::endl;*/ + /** HERE IS CHESS METHOD **/ - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, 1); + /*auxPtr = helpFunc; + + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; - /*int poc = 0; - for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - if( BlockIterPom1[ i ] ) - poc = poc+1; - std::cout << "pocet bloku, ktere se pocitali = " << poc << std::endl;*/ + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, /*BlockIterPom,*/ numBlocksX, numBlocksY ); + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - - BlockIterD = dBlock.getElement( 0 ); - //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + BlockIterD = dBlock.getElement( 0 );*/ + + /**------------------------------------------------------------------------------------------------*/ + + + /** HERE IS FIM **/ + + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + + //int pocBloku = 0; + Devices::Cuda::synchronizeDevice(); + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + //std::cout << "Pocet aktivnich bloku = " << pocBloku << std::endl; + + GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, numBlocksX, numBlocksY ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + //std::cout<< "Probehlo" << std::endl; + + //TNL::swap( auxPtr, helpFunc ); + + + CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + TNL_CHECK_CUDA_DEVICE; + + + BlockIterD = dBlock.getElement( 0 ); + //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + + /**-----------------------------------------------------------------------------------------------------------*/ /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - //pocIter ++; -======= - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY ); - - tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - - TNL::Containers::Array< int, Devices::Host, IndexType > BlockIter; - BlockIter.setSize( numBlocksX * numBlocksY ); - BlockIter.setValue( 0 ); - /*int* BlockIter = (int*)malloc( ( numBlocksX * numBlocksY ) * sizeof( int ) ); - for( int i = 0; i < numBlocksX*numBlocksY +1; i++) - BlockIter[i] = 1;*/ - - int BlockIterD = 1; - - TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; - BlockIterDevice.setSize( numBlocksX * numBlocksY ); - BlockIterDevice.setValue( 1 ); - /*int *BlockIterDevice; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) ); - cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice);*/ - - int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); - - TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; - dBlock.setSize( nBlocks ); - dBlock.setValue( 0 ); - /*int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ - - while( BlockIterD ) - { - /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ i ] = false;*/ - - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - BlockIter = BlockIterDevice; - //cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyDeviceToHost); - GetNeighbours( BlockIter, numBlocksX, numBlocksY ); - - BlockIterDevice = BlockIter; - //cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - CudaParallelReduc<<< nBlocks, 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - - /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - - } - /*cudaFree( BlockIterDevice ); - cudaFree( dBlock ); - delete BlockIter;*/ - cudaDeviceSynchronize(); - - TNL_CHECK_CUDA_DEVICE; - - aux = *auxPtr; - interfaceMap = *interfaceMapPtr; -#endif ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf + numIter ++; } + if( numIter == 1 ){ + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + } + /*cudaFree( BlockIterDevice ); + cudaFree( dBlock ); + delete BlockIter;*/ cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //std::cout<< pocIter << std::endl; + TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; interfaceMap = *interfaceMapPtr; @@ -366,12 +403,14 @@ solve( const MeshPointer& mesh, } aux.save("aux-final.tnl"); } -<<<<<<< HEAD + #ifdef HAVE_CUDA + + template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) + /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) { int i = blockIdx.x * 1024 + threadIdx.x; @@ -381,103 +420,68 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index int m=0, k=0; m = i%numBlockX; k = i/numBlockX; - if( m > 0 ) - if( BlockIterDevice[ i - 1 ] ) - pom = 1;//BlockIterPom[ i ] = 1; - if( m < numBlockX -1 && pom == 0 ) - if( BlockIterDevice[ i + 1 ] ) - pom = 1;//BlockIterPom[ i ] = 1; - if( k > 0 && pom == 0 ) - if( BlockIterDevice[ i - numBlockX ] ) - pom = 1;// BlockIterPom[ i ] = 1; - if( k < numBlockY -1 && pom == 0 ) - if( BlockIterDevice[ i + numBlockX ] ) - pom = 1;//BlockIterPom[ i ] = 1; + if( m > 0 && BlockIterDevice[ i - 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterDevice[ i + 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterDevice[ i - numBlockX ] ){ + pom = 1;// BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterDevice[ i + numBlockX ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + } - - BlockIterDevice[ i ] = pom;//BlockIterPom[ i ]; } } -======= -template < typename Index > -void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ) -{ - TNL::Containers::Array< int, Devices::Host, Index > BlockIterPom; - BlockIterPom.setSize( numBlockX * numBlockY ); - BlockIterPom.setValue( 0 ); - /*int* BlockIterPom; - BlockIterPom = new int[numBlockX * numBlockY];*/ - /*for(int i = 0; i < numBlockX * numBlockY; i++) - BlockIterPom[ i ] = 0;*/ - for(int i = 0; i < numBlockX * numBlockY; i++) - { - - if( BlockIter[ i ] ) - { - // i = k*numBlockY + m; - int m=0, k=0; - m = i%numBlockY; - k = i/numBlockY; - if( k > 0 && numBlockY > 1 ) - BlockIterPom[i - numBlockX] = 1; - if( k < numBlockY-1 && numBlockY > 1 ) - BlockIterPom[i + numBlockX] = 1; - - if( m >= 0 && m < numBlockX - 1 && numBlockX > 1 ) - BlockIterPom[ i+1 ] = 1; - if( m <= numBlockX -1 && m > 0 && numBlockX > 1 ) - BlockIterPom[ i-1 ] = 1; - } - } - for(int i = 0; i < numBlockX * numBlockY; i++ ){ -/// if( !BlockIter[ i ] ) - BlockIter[ i ] = BlockIterPom[ i ]; -/// else -/// BlockIter[ i ] = 0; - } - /*for( int i = numBlockX-1; i > -1; i-- ) - { - for( int j = 0; j< numBlockY; j++ ) - std::cout << BlockIter[ i*numBlockY + j ]; - std::cout << std::endl; - } - std::cout << std::endl;*/ - //delete[] BlockIterPom; -} - -#ifdef HAVE_CUDA ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) { -<<<<<<< HEAD int i = threadIdx.x; int blId = blockIdx.x; - __shared__ volatile int sArray[ 512 ]; + int blockSize = blockDim.x; + /*if ( i == 0 && blId == 0 ){ + printf( "nBlocks = %d \n", nBlocks ); + for( int j = nBlocks-1; j > -1 ; j--){ + printf( "cislo = %d \n", BlockIterDevice[ j ] ); + } + }*/ + __shared__ int sArray[ 1024 ]; sArray[ i ] = 0; - if(blId * 512 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; + if( blId * 1024 + i < nBlocks ) + sArray[ i ] = BlockIterDevice[ blId * 1024 + i ]; __syncthreads(); - if (blockDim.x == 1024) { + /*extern __shared__ volatile int sArray[]; + unsigned int i = threadIdx.x; + unsigned int gid = blockIdx.x * blockSize * 2 + threadIdx.x; + unsigned int gridSize = blockSize * 2 * gridDim.x; + sArray[ i ] = 0; + while( gid < nBlocks ) + { + sArray[ i ] += BlockIterDevice[ gid ] + BlockIterDevice[ gid + blockSize ]; + gid += gridSize; + } + __syncthreads();*/ + + if ( blockSize == 1024) { if (i < 512) sArray[ i ] += sArray[ i + 512 ]; } __syncthreads(); - if (blockDim.x >= 512) { + if (blockSize >= 512) { if (i < 256) { sArray[ i ] += sArray[ i + 256 ]; } } - if (blockDim.x >= 256) { + __syncthreads(); + if (blockSize >= 256) { if (i < 128) { sArray[ i ] += sArray[ i + 128 ]; } } __syncthreads(); - if (blockDim.x >= 128) { + if (blockSize >= 128) { if (i < 64) { sArray[ i ] += sArray[ i + 64 ]; } @@ -485,183 +489,120 @@ __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, I __syncthreads(); if (i < 32 ) { - if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + if( blockSize >= 64 ) sArray[ i ] += sArray[ i + 32 ]; + if( blockSize >= 32 ) sArray[ i ] += sArray[ i + 16 ]; + if( blockSize >= 16 ) sArray[ i ] += sArray[ i + 8 ]; + if( blockSize >= 8 ) sArray[ i ] += sArray[ i + 4 ]; + if( blockSize >= 4 ) sArray[ i ] += sArray[ i + 2 ]; + if( blockSize >= 2 ) sArray[ i ] += sArray[ i + 1 ]; } if( i == 0 ) dBlock[ blId ] = sArray[ 0 ]; -======= - int i = threadIdx.x; - int blId = blockIdx.x; - /*if ( i == 0 && blId == 0 ){ - printf( "nBlocks = %d \n", nBlocks ); - for( int j = nBlocks-1; j > -1 ; j--){ - printf( "cislo = %d \n", BlockIterDevice[ j ] ); - } - }*/ - __shared__ volatile int sArray[ 512 ]; - sArray[ i ] = 0; - if( blId * 512 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; - __syncthreads(); - - if (blockDim.x == 1024) { - if (i < 512) - sArray[ i ] += sArray[ i + 512 ]; - } - __syncthreads(); - if (blockDim.x >= 512) { - if (i < 256) { - sArray[ i ] += sArray[ i + 256 ]; - } - } - __syncthreads(); - if (blockDim.x >= 256) { - if (i < 128) { - sArray[ i ] += sArray[ i + 128 ]; - } - } - __syncthreads(); - if (blockDim.x >= 128) { - if (i < 64) { - sArray[ i ] += sArray[ i + 64 ]; - } - } - __syncthreads(); - if (i < 32 ) - { - if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; - } - - if( i == 0 ) - dBlock[ blId ] = sArray[ 0 ]; ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf } -template < typename Real, typename Device, typename Index > +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, -<<<<<<< HEAD - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne ) -======= - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; - int blIdx = blockIdx.x; int blIdy = blockIdx.y; - int grIdx = gridDim.x; - - if( BlockIterDevice[ blIdy * grIdx + blIdx] ) + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + /** FOR CHESS METHOD */ + if( (blockIdx.y%2 + blockIdx.x) % 2 == oddEvenBlock ) { - - const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + /**-----------------------------------------*/ - int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - __shared__ volatile int numOfBlockx; - __shared__ volatile int numOfBlocky; - __shared__ int xkolik; - __shared__ int ykolik; - __shared__ volatile int NE; - if( thri == 0 && thrj == 0 ) + + /** FOR FIM METHOD */ + /*if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x] ) + {*/ + /**-----------------------------------------*/ + const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + __shared__ volatile int dimX; + __shared__ volatile int dimY; + __shared__ volatile Real hx; + __shared__ volatile Real hy; + if( thri==0 && thrj == 0) { - xkolik = blockDim.x + 1; - ykolik = blockDim.y + 1; - numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); - numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); - - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - BlockIterDevice[ blIdy * grIdx + blIdx ] = 0; - NE = ne; + dimX = mesh.getDimensions().x(); + dimY = mesh.getDimensions().y(); + hx = mesh.getSpaceSteps().x(); + hy = mesh.getSpaceSteps().y(); + BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] = 0; } __syncthreads(); - - int i = thri + blockDim.x*blIdx; - int j = blockDim.y*blIdy + thrj; + int numOfBlockx; + int numOfBlocky; + int xkolik; + int ykolik; + + xkolik = blockDim.x + 1; + ykolik = blockDim.y + 1; + numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); + numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + + if( numOfBlockx - 1 == blockIdx.x ) + xkolik = dimX - (blockIdx.x)*blockDim.x+1; + + if( numOfBlocky -1 == blockIdx.y ) + ykolik = dimY - (blockIdx.y)*blockDim.y+1; + __syncthreads(); + int currentIndex = thrj * blockDim.x + thri; - if( BlockIterDevice[ blIdy * gridDim.x + blIdx] ) - { //__shared__ volatile bool changed[ blockDim.x*blockDim.y ]; - __shared__ volatile bool changed[16*16]; + __shared__ volatile bool changed[ (sizeSArray-2)*(sizeSArray-2)]; changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 ) changed[ 0 ] = true; - __shared__ Real hx; - __shared__ Real hy; - if( thrj == 1 && thri == 1 ) - { - hx = mesh.getSpaceSteps().x(); - hy = mesh.getSpaceSteps().y(); - } //__shared__ volatile Real sArray[ blockDim.y+2 ][ blockDim.x+2 ]; - __shared__ volatile Real sArray[18][18]; - sArray[thrj][thri] = std::numeric_limits< Real >::max(); + __shared__ volatile Real sArray[ sizeSArray * sizeSArray ]; + sArray[ thrj * sizeSArray + thri ] = std::numeric_limits< Real >::max(); //filling sArray edges if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && NE == 1 ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + { + if( dimX > (blockIdx.x+1) * blockDim.x && thrj+1 < ykolik ) + sArray[(thrj+1)*sizeSArray + xkolik] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + sArray[(thrj+1)*sizeSArray + xkolik] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { - if( blIdx != 0 && thrj+1 < ykolik && NE == 1 ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + if( blockIdx.x != 0 && thrj+1 < ykolik ) + sArray[(thrj+1)*sizeSArray + 0] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + (thrj+1)*dimX ]; else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + sArray[(thrj+1)*sizeSArray + 0] = std::numeric_limits< Real >::max(); } -<<<<<<< HEAD if( thri == 2 ) { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik && NE == 1 ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + if( dimY > (blockIdx.y+1) * blockDim.y && thrj+1 < xkolik ) + sArray[ ykolik*sizeSArray + thrj+1 ] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); -======= - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - //BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf + sArray[ykolik*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); + } -<<<<<<< HEAD if( thri == 3 ) { - if( blIdy != 0 && thrj+1 < xkolik && NE == 1 ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; + if( blockIdx.y != 0 && thrj+1 < xkolik ) + sArray[0*sizeSArray + thrj+1] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + thrj+1 ]; else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + sArray[0*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + if( i < dimX && j < dimY ) { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + sArray[(thrj+1)*sizeSArray + thri+1] = aux[ j*dimX + i ]; } __syncthreads(); @@ -672,25 +613,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< changed[ currentIndex] = false; //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + if( i < dimX && j < dimY ) { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) -======= - if(thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; - - if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - } - - if( thri == 1 ) ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf + if( ! interfaceMap[ j * dimX + i ] ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); } } __syncthreads(); @@ -724,12 +651,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< { if( currentIndex < 64 ) { -<<<<<<< HEAD changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; } } __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + if( currentIndex < 32 ) { if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; @@ -738,82 +664,23 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * grIdx + blIdx ] = 1; + if( thri == 0 && thrj == 0 && changed[ 0 ] ){ + BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] = 1; + } + /*if( thri==0 && thrj == 0 && blockIdx.x == 0 && blockIdx.y == 0 ) + { + for( int k = 15; k>-1; k-- ){ + for( int l = 0; l < 16; l++ ) + printf( "%f\t", sArray[k * 16 + l]); + printf( "\n"); + } + printf( "\n"); + }*/ __syncthreads(); } + if( i < dimX && j < dimY ) + helpFunc[ j * dimX + i ] = sArray[ ( thrj + 1 ) * sizeSArray + thri + 1 ]; - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; - } -======= - __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) - { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); - } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y == 1024 ) - { - if( currentIndex < 512 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 512 ) - { - if( currentIndex < 256 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 256 ) - { - if( currentIndex < 128 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 128 ) - { - if( currentIndex < 64 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; - } - } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU - { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; - } - if( changed[ 0 ] && thri == 0 && thrj == 0 ){ - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; - } - __syncthreads(); - } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; - } - /*if( thri == 0 && thrj == 0 ) - printf( "Block ID = %d, value = %d \n", (blIdy * numOfBlockx + blIdx), BlockIterDevice[ blIdy * numOfBlockx + blIdx ] );*/ ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf + } } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 8d71bfe06..4daf9fc92 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -280,17 +280,12 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); -<<<<<<< HEAD cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; -======= - //CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - //CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); ->>>>>>> da336fb8bd927bc927bde8bde5876b18f07a23cf CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); cudaDeviceSynchronize(); -- GitLab From 04c3e8bc96332c62efd318cb971193cfb274c490 Mon Sep 17 00:00:00 2001 From: Fencl Date: Wed, 31 Oct 2018 06:44:59 +0100 Subject: [PATCH 05/20] Repair of last commit (error for - wihtout cuda): FIM method implemented for 2D GPU and FIM-FSM implemented for 2D CPU (parallel). --- .../tnlDirectEikonalMethodsBase_impl.h | 119 +++++++++--------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 95971c9b8..500d1bf03 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -11,6 +11,7 @@ #include #include "tnlFastSweepingMethod.h" +#include "tnlDirectEikonalMethodsBase.h" template< typename Real, typename Device, @@ -135,7 +136,7 @@ updateBlocks( InterfaceMapType interfaceMap, bool changed = false; - RealType *sArray; + Real *sArray; sArray = new Real[ sizeSArray * sizeSArray ]; if( sArray == nullptr ) std::cout << "Error while allocating memory for sArray." << std::endl; @@ -175,7 +176,7 @@ updateBlocks( InterfaceMapType interfaceMap, //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - pom = this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + pom = this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); changed = changed || pom; } } @@ -195,7 +196,7 @@ updateBlocks( InterfaceMapType interfaceMap, { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); } } } @@ -213,7 +214,7 @@ updateBlocks( InterfaceMapType interfaceMap, { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); } } } @@ -231,7 +232,7 @@ updateBlocks( InterfaceMapType interfaceMap, { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx, hy, 1.0); } } } @@ -258,7 +259,7 @@ updateBlocks( InterfaceMapType interfaceMap, } //std::cout< +template< int sizeSArray > +__cuda_callable__ +bool +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, + const Real v ) +{ + const RealType value = sArray[ thrj * sizeSArray + thri ]; + RealType a, b, tmp = std::numeric_limits< RealType >::max(); + + b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], + sArray[ (thrj-1) * sizeSArray + thri ] ); + + a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], + sArray[ thrj * sizeSArray + thri-1 ] ); + + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + + return false; +} #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -1133,58 +1185,7 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 } -template< typename Real, - typename Device, - typename Index > -template< int sizeSArray > -__cuda_callable__ -bool -tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: -updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, - const Real v ) -{ - const RealType value = sArray[ thrj * sizeSArray + thri ]; - RealType a, b, tmp = std::numeric_limits< RealType >::max(); - - b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], - sArray[ (thrj-1) * sizeSArray + thri ] ); - - a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], - sArray[ thrj * sizeSArray + thri-1 ] ); - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; - - - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj * sizeSArray + thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj * sizeSArray + thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - - return false; -} + template< typename Real, typename Device, -- GitLab From b95d6c7b98234dd1e93f245e0d2076ac91ba14bc Mon Sep 17 00:00:00 2001 From: Fencl Date: Thu, 1 Nov 2018 16:26:36 +0100 Subject: [PATCH 06/20] Last repair of FIM for GPU. --- .../tnlDirectEikonalMethodsBase.h | 2 +- .../tnlDirectEikonalMethodsBase_impl.h | 72 ++++---- .../tnlFastSweepingMethod2D_impl.h | 165 ++++++++++-------- 3 files changed, 125 insertions(+), 114 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index cbb1a1ff6..ccbae8abe 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -148,7 +148,7 @@ __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, I template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 500d1bf03..5083544e2 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -134,6 +134,7 @@ updateBlocks( InterfaceMapType interfaceMap, Real hy = mesh.getSpaceSteps().y(); bool changed = false; + BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; Real *sArray; @@ -143,53 +144,52 @@ updateBlocks( InterfaceMapType interfaceMap, for( int thri = 0; thri < sizeSArray; thri++ ){ for( int thrj = 0; thrj < sizeSArray; thrj++ ) - sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); + sArray[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); } - BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) - sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; + sArray[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; if( blIdx != 0 && thrj+1 < ykolik ) - sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; + sArray[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) - sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; + sArray[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; if( blIdy != 0 && thrj+1 < xkolik ) - sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; + sArray[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; } for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) - sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; + sArray[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; } - bool pom = false; + for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ){ if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){ //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - pom = this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); - changed = changed || pom; + changed = this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy) || changed; + } } } } /*aux.save( "aux-1pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ - + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + for( int k = 0; k < numThreadsPerBlock; k++ ) for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) @@ -201,12 +201,12 @@ updateBlocks( InterfaceMapType interfaceMap, } } /*aux.save( "aux-2pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = 0; l < numThreadsPerBlock; l++ ) { @@ -219,12 +219,12 @@ updateBlocks( InterfaceMapType interfaceMap, } } /*aux.save( "aux-3pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ){ for( int l = numThreadsPerBlock-1; l >-1; l-- ) { @@ -238,12 +238,12 @@ updateBlocks( InterfaceMapType interfaceMap, } } /*aux.save( "aux-4pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ if( changed ){ @@ -254,7 +254,7 @@ updateBlocks( InterfaceMapType interfaceMap, for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) - helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ]; + helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ (k + 1)* sizeSArray + l + 1 ]; //std::cout<< sArray[k+1][l+1]; } //std::cout<template updateBlocks< 1026 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ if( IsCalculationDone == 0 ){ IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; @@ -130,6 +131,7 @@ solve( const MeshPointer& mesh, } } numWhile++; + std::cout <<"numWhile = "<< numWhile <-1; j-- ){ for( int i = 0; i < numBlocksX; i++ ) @@ -146,7 +148,6 @@ solve( const MeshPointer& mesh, std::cout << std::endl; } std::cout << std::endl;*/ - //Reduction //std::cout<updateCell( aux, cell ); } - } + } //aux.save( "aux-1.tnl" ); @@ -261,12 +262,12 @@ solve( const MeshPointer& mesh, TNL_CHECK_CUDA_DEVICE; - /*TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; - BlockIterPom.setSize( numBlocksX * numBlocksY ); - BlockIterPom.setValue( 0 );*/ + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; + BlockIterPom.setSize( numBlocksX * numBlocksY ); + BlockIterPom.setValue( 0 ); /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -284,9 +285,7 @@ solve( const MeshPointer& mesh, cudaMalloc((void**) &dBlock, nBlocks * sizeof( int ) );*/ - MeshFunctionPointer helpFunc1; - helpFunc1->setMesh(mesh); - + MeshFunctionPointer helpFunc1( mesh ); MeshFunctionPointer helpFunc( mesh ); helpFunc1 = auxPtr; @@ -301,83 +300,94 @@ solve( const MeshPointer& mesh, /** HERE IS CHESS METHOD **/ /*auxPtr = helpFunc; + + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; + + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; + + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + + CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + BlockIterD = dBlock.getElement( 0 );*/ + + /**------------------------------------------------------------------------------------------------*/ + + + /** HERE IS FIM **/ + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + TNL_CHECK_CUDA_DEVICE; + + //int pocBloku = 0; + Devices::Cuda::synchronizeDevice(); CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), - auxPtr.template getData< Device>(), + auxPtr.template modifyData< Device>(), helpFunc.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); + BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - auxPtr = helpFunc; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + //std::cout << "Pocet aktivnich bloku = " << pocBloku << std::endl; + //BlockIterPom1 = BlockIterDevice; + ///for( int i =0; i< numBlocksX; i++ ){ + // for( int j = 0; j < numBlocksY; j++ ) + // { + // std::cout << BlockIterPom1[j*numBlocksX + i]; + // } + // std::cout << std::endl; + //} + //std::cout << std::endl; - CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template getData< Device>(), - helpFunc.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); + GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - auxPtr = helpFunc; + BlockIterDevice = BlockIterPom; + + //std::cout<< "Probehlo" << std::endl; + + //TNL::swap( auxPtr, helpFunc ); - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - BlockIterD = dBlock.getElement( 0 );*/ - /**------------------------------------------------------------------------------------------------*/ + BlockIterD = dBlock.getElement( 0 ); + //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; - /** HERE IS FIM **/ - - helpFunc1 = auxPtr; - auxPtr = helpFunc; - helpFunc = helpFunc1; - - //int pocBloku = 0; - Devices::Cuda::synchronizeDevice(); - CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - helpFunc.template modifyData< Device>(), - BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //std::cout << "Pocet aktivnich bloku = " << pocBloku << std::endl; - - GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, numBlocksX, numBlocksY ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //std::cout<< "Probehlo" << std::endl; - - //TNL::swap( auxPtr, helpFunc ); - - - CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - TNL_CHECK_CUDA_DEVICE; - - - BlockIterD = dBlock.getElement( 0 ); - //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - /**-----------------------------------------------------------------------------------------------------------*/ /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ @@ -392,7 +402,6 @@ solve( const MeshPointer& mesh, cudaFree( dBlock ); delete BlockIter;*/ cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; @@ -410,7 +419,7 @@ solve( const MeshPointer& mesh, template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ) { int i = blockIdx.x * 1024 + threadIdx.x; @@ -430,7 +439,7 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index pom = 1;//BlockIterPom[ i ] = 1; } - BlockIterDevice[ i ] = pom;//BlockIterPom[ i ]; + BlockIterPom[ i ] = pom;//BlockIterPom[ i ]; } } @@ -514,14 +523,16 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; /** FOR CHESS METHOD */ - if( (blockIdx.y%2 + blockIdx.x) % 2 == oddEvenBlock ) - { - /**-----------------------------------------*/ - + //if( (blockIdx.y%2 + blockIdx.x) % 2 == oddEvenBlock ) + //{ + /**------------------------------------------*/ + + + /** FOR FIM METHOD */ - /** FOR FIM METHOD */ - /*if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x] ) - {*/ + if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] ) + { + __syncthreads(); /**-----------------------------------------*/ const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ volatile int dimX; -- GitLab From 0dcd35d5f5d5445a7ecb05cf8b9522a0cf29bca8 Mon Sep 17 00:00:00 2001 From: Fencl Date: Mon, 5 Nov 2018 14:43:21 +0100 Subject: [PATCH 07/20] FIM implemented in 3D --- .../tnlDirectEikonalMethodsBase.h | 10 +- .../tnlFastSweepingMethod2D_impl.h | 16 +- .../tnlFastSweepingMethod3D_impl.h | 881 +++++++++--------- 3 files changed, 478 insertions(+), 429 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index ccbae8abe..7d990c1bb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -160,11 +160,17 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); -template < typename Real, typename Device, typename Index > +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + +template < typename Index > +__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, + int numBlockX, int numBlockY, int numBlockZ ); #endif #include "tnlDirectEikonalMethodsBase_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index bc82b7a2c..fa2716897 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -85,7 +85,7 @@ solve( const MeshPointer& mesh, { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 1024; + int numThreadsPerBlock = 16; int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); @@ -115,13 +115,13 @@ solve( const MeshPointer& mesh, } std::cout<template updateBlocks< 1026 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ @@ -394,9 +394,7 @@ solve( const MeshPointer& mesh, numIter ++; } if( numIter == 1 ){ - helpFunc1 = auxPtr; auxPtr = helpFunc; - helpFunc = helpFunc1; } /*cudaFree( BlockIterDevice ); cudaFree( dBlock ); @@ -535,10 +533,10 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< __syncthreads(); /**-----------------------------------------*/ const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); - __shared__ volatile int dimX; - __shared__ volatile int dimY; - __shared__ volatile Real hx; - __shared__ volatile Real hy; + __shared__ int dimX; + __shared__ int dimY; + __shared__ Real hx; + __shared__ Real hy; if( thri==0 && thrj == 0) { dimX = mesh.getDimensions().x(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 4daf9fc92..65aba5bf5 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -16,511 +16,556 @@ #include "tnlFastSweepingMethod.h" template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 1 ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > const Index& FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: getMaxIterations() const { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: setMaxIterations( const IndexType& maxIterations ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ) + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ) { - MeshFunctionPointer auxPtr; - InterfaceMapPointer interfaceMapPtr; - auxPtr->setMesh( mesh ); - interfaceMapPtr->setMesh( mesh ); - std::cout << "Initiating the interface cells ..." << std::endl; - BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + MeshFunctionPointer auxPtr; + InterfaceMapPointer interfaceMapPtr; + auxPtr->setMesh( mesh ); + interfaceMapPtr->setMesh( mesh ); + std::cout << "Initiating the interface cells ..." << std::endl; + BaseType::initInterface( u, auxPtr, interfaceMapPtr ); #ifdef HAVE_CUDA - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); #endif - auxPtr->save( "aux-ini.tnl" ); - - typename MeshType::Cell cell( *mesh ); - - IndexType iteration( 0 ); - MeshFunctionType aux = *auxPtr; - InterfaceMapType interfaceMap = * interfaceMapPtr; - while( iteration < this->maxIterations ) + auxPtr->save( "aux-ini.tnl" ); + + typename MeshType::Cell cell( *mesh ); + + IndexType iteration( 0 ); + MeshFunctionType aux = *auxPtr; + InterfaceMapType interfaceMap = * interfaceMapPtr; + while( iteration < this->maxIterations ) + { + if( std::is_same< DeviceType, Devices::Host >::value ) { - if( std::is_same< DeviceType, Devices::Host >::value ) + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) { - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-1.tnl" ); - - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-3.tnl" ); - - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-4.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "5 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-5.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "6 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-6.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "7 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "8 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } } - if( std::is_same< DeviceType, Devices::Cuda >::value ) + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) { - // TODO: CUDA code -#ifdef HAVE_CUDA - const int cudaBlockSize( 8 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); - int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z(), cudaBlockSize ); - if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) - std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; - dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); - - tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; - - - int BlockIterD = 1; - - TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; - BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ ); - BlockIterDevice.setValue( 1 ); - /*int *BlockIterDevice; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/ - int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); - - TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; - dBlock.setSize( nBlocks ); - dBlock.setValue( 0 ); - /*int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ - - while( BlockIterD ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) { - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - - /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); } - //cudaFree( BlockIterDevice ); - //cudaFree( dBlock ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - aux = *auxPtr; - interfaceMap = *interfaceMapPtr; -#endif + } } - - //aux.save( "aux-8.tnl" ); - iteration++; + //aux.save( "aux-2.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-4.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "5 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-5.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "6 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-6.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "7 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-7.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "8 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { + // TODO: CUDA code +#ifdef HAVE_CUDA + const int cudaBlockSize( 8 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z(), cudaBlockSize ); + if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) + std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; + dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); + + tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; + + + int BlockIterD = 1; + + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterDevice.setValue( 1 ); + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; + BlockIterPom.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterPom.setValue( 0 ); + /*int *BlockIterDevice; + cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/ + int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); - } - aux.save("aux-final.tnl"); + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + dBlock.setValue( 0 ); + + int nBlocksNeigh = ( numBlocksX * numBlocksY * numBlocksZ )/1024 + ((( numBlocksX * numBlocksY * numBlocksZ )%1024 != 0) ? 1:0); + /*int *dBlock; + cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ + MeshFunctionPointer helpFunc1( mesh ); + MeshFunctionPointer helpFunc( mesh ); + + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + int numIter = 0; + + while( BlockIterD ) + { + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + TNL_CHECK_CUDA_DEVICE; + + CudaUpdateCellCaller< 10 ><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY, numBlocksZ ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + BlockIterDevice = BlockIterPom; + + CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + numIter++; + /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) + BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ + + } + if( numIter == 1 ){ + auxPtr = helpFunc; + } + //cudaFree( BlockIterDevice ); + //cudaFree( dBlock ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; +#endif + } + + //aux.save( "aux-8.tnl" ); + iteration++; + + } + aux.save("aux-final.tnl"); } #ifdef HAVE_CUDA -template < typename Real, typename Device, typename Index > +template < typename Index > +__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, + int numBlockX, int numBlockY, int numBlockZ ) +{ + int i = blockIdx.x * 1024 + threadIdx.x; + + if( i < numBlockX * numBlockY * numBlockZ ) + { + int pom = 0;//BlockIterPom[ i ] = 0; + int m=0, l=0, k=0; + l = i/( numBlockX * numBlockY ); + k = (i-l*numBlockX * numBlockY )/(numBlockX ); + m = (i-l*numBlockX * numBlockY )%( numBlockX ); + if( m > 0 && BlockIterDevice[ i - 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterDevice[ i + 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterDevice[ i - numBlockX ] ){ + pom = 1;// BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterDevice[ i + numBlockX ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( l > 0 && BlockIterDevice[ i - numBlockX*numBlockY ] ){ + pom = 1; + }else if( l < numBlockZ-1 && BlockIterDevice[ i + numBlockX*numBlockY ] ){ + pom = 1; + } + + BlockIterPom[ i ] = pom;//BlockIterPom[ i ]; + } +} + +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) { - int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; - int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - int k = blockDim.z*blockIdx.z + threadIdx.z; - int currentIndex = thrk * blockDim.x * blockDim.y + thrj * blockDim.x + thri; + int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; + int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + int k = blockDim.z*blockIdx.z + threadIdx.z; + int currentIndex = thrk * blockDim.x * blockDim.y + thrj * blockDim.x + thri; + + if( BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] ) + { + __syncthreads(); - __shared__ volatile bool changed[8*8*8]; - changed[ currentIndex ] = false; + __shared__ volatile bool changed[ (sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)]; + changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 && thrk == 0 ) - changed[ 0 ] = true; + changed[ 0 ] = true; const Meshes::Grid< 3, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); - __shared__ Real hx; - __shared__ Real hy; - __shared__ Real hz; + __shared__ Real hx; __shared__ int dimX; + __shared__ Real hy; __shared__ int dimY; + __shared__ Real hz; __shared__ int dimZ; + if( thrj == 1 && thri == 1 && thrk == 1 ) { - hx = mesh.getSpaceSteps().x(); - hy = mesh.getSpaceSteps().y(); - hz = mesh.getSpaceSteps().z(); + hx = mesh.getSpaceSteps().x(); + hy = mesh.getSpaceSteps().y(); + hz = mesh.getSpaceSteps().z(); + dimX = mesh.getDimensions().x(); + dimY = mesh.getDimensions().y(); + dimZ = mesh.getDimensions().z(); + BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] = 0; } - __shared__ volatile Real sArray[10][10][10]; - sArray[thrk][thrj][thri] = std::numeric_limits< Real >::max(); - if(thri == 0 ) - { - sArray[8][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); - sArray[thrk+1][thrj+1][8] = std::numeric_limits< Real >::max(); - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); - sArray[thrj+1][8][thrk+1] = std::numeric_limits< Real >::max(); - sArray[thrj+1][9][thrk+1] = std::numeric_limits< Real >::max(); - } - + __shared__ volatile Real sArray[sizeSArray][sizeSArray][sizeSArray]; + sArray[thrk+1][thrj+1][thri+1] = std::numeric_limits< Real >::max(); + //filling sArray edges - int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - int dimZ = mesh.getDimensions().z(); - __shared__ volatile int numOfBlockx; - __shared__ volatile int numOfBlocky; - __shared__ volatile int numOfBlockz; - __shared__ int xkolik; - __shared__ int ykolik; - __shared__ int zkolik; - if( thri == 0 && thrj == 0 && thrk == 0 ) - { - xkolik = blockDim.x + 1; - ykolik = blockDim.y + 1; - zkolik = blockDim.z + 1; - numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); - numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); - numOfBlockz = dimZ/blockDim.z + ((dimZ%blockDim.z != 0) ? 1:0); - - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - if( numOfBlockz-1 == blIdz ) - zkolik = dimZ - (blIdz)*blockDim.z+1; - - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 0; - } + int numOfBlockx; + int numOfBlocky; + int numOfBlockz; + int xkolik; + int ykolik; + int zkolik; + xkolik = blockDim.x + 1; + ykolik = blockDim.y + 1; + zkolik = blockDim.z + 1; + numOfBlockx = gridDim.x; + numOfBlocky = gridDim.y; + numOfBlockz = gridDim.z; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*blockDim.x+1; + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*blockDim.y+1; + if( numOfBlockz-1 == blIdz ) + zkolik = dimZ - (blIdz)*blockDim.z+1; __syncthreads(); if( thri == 0 ) { - if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; - else - sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); + if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; + else + sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; - else - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; + else + sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); } if( thri == 2 ) { - if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; - else - sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); + if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; + else + sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 3 ) { - if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; - else - sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); + if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; + else + sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 4 ) { - if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; - else - sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; + else + sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); } if( thri == 5 ) { - if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; - else - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; + else + sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); } - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + if( i < dimX && j < dimY && k < dimZ ) { - sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; + sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; } - __shared__ volatile int loopcounter; - loopcounter = 0; __syncthreads(); while( changed[ 0 ] ) { - __syncthreads(); - - changed[ currentIndex ] = false; - - //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) - { - if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) - { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); - } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y*blockDim.z == 1024 ) - { - if( currentIndex < 512 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y*blockDim.z >= 512 ) + __syncthreads(); + + changed[ currentIndex ] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) + { + if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) { - if( currentIndex < 256 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; - } + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); } - __syncthreads(); - if( blockDim.x*blockDim.y*blockDim.z >= 256 ) + } + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y*blockDim.z == 1024 ) + { + if( currentIndex < 512 ) { - if( currentIndex < 128 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; - } + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; } - __syncthreads(); - if( blockDim.x*blockDim.y*blockDim.z >= 128 ) + } + __syncthreads(); + if( blockDim.x*blockDim.y*blockDim.z >= 512 ) + { + if( currentIndex < 256 ) { - if( currentIndex < 64 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; - } + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + } + __syncthreads(); + if( blockDim.x*blockDim.y*blockDim.z >= 256 ) + { + if( currentIndex < 128 ) { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; } - __syncthreads(); - - /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) - { - for(int m = 0; m < 8; m++){ - for(int n = 0; n<8; n++){ - for(int b=0; b<8; b++) - printf(" %i ", changed[m*64 + n*8 + b]); - printf("\n"); - } - printf("\n \n"); - } - }*/ - if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) + } + __syncthreads(); + if( blockDim.x*blockDim.y*blockDim.z >= 128 ) + { + if( currentIndex < 64 ) { - //loopcounter++; - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; } - __syncthreads(); - /*if(thri == 0 && thrj==0 && thrk==0) - printf("%i \n",loopcounter); - if(loopcounter == 500) - break;*/ + } + __syncthreads(); + if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + { + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + __syncthreads(); + + /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) + { + for(int m = 0; m < 8; m++){ + for(int n = 0; n<8; n++){ + for(int b=0; b<8; b++) + printf(" %i ", changed[m*64 + n*8 + b]); + printf("\n"); + } + printf("\n \n"); + } + }*/ + if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) + { + BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + } + __syncthreads(); } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ && (!interfaceMap[ k*dimX*dimY+j * mesh.getDimensions().x() + i ]) ) - aux[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; -} + + if( i < dimX && j < dimY && k < dimZ ) + helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; + } +} #endif -- GitLab From 98abe9f62cc46968797cec710434d55f456695e0 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 15 Nov 2018 13:24:51 +0100 Subject: [PATCH 08/20] Enabled computations with single precision. --- .../Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h index f8f9187fa..a2a1d7372 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h @@ -23,7 +23,7 @@ namespace Solvers { /**** * Turn off support for float and long double. */ -template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, float > { enum { enabled = false }; }; +template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, float > { enum { enabled = true }; }; template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, long double > { enum { enabled = false }; }; /**** -- GitLab From f206b754c5a76ec352ae91e3c3286acc0b27d512 Mon Sep 17 00:00:00 2001 From: Fencl Date: Fri, 16 Nov 2018 12:03:40 +0100 Subject: [PATCH 09/20] 3D FSM+FIM implemented 2D FSM+FIM method pickes size of rectangular block depending on number of blocks --- .../tnlDirectEikonalMethodsBase.h | 214 ++++---- .../tnlDirectEikonalMethodsBase_impl.h | 519 +++++++++++++++--- .../hamilton-jacobi/tnlFastSweepingMethod.h | 222 ++++---- .../tnlFastSweepingMethod2D_impl.h | 74 ++- .../tnlFastSweepingMethod3D_impl.h | 455 +++++++++------ 5 files changed, 1004 insertions(+), 480 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 7d990c1bb..f712ce2cc 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -19,102 +19,112 @@ class tnlDirectEikonalMethodsBase }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > { - public: - - typedef Meshes::Grid< 1, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0 ); - - __cuda_callable__ bool updateCell( volatile Real sArray[18], - int thri, const Real h, - const Real velocity = 1.0 ); + public: + + typedef Meshes::Grid< 1, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0 ); + + __cuda_callable__ bool updateCell( volatile Real sArray[18], + int thri, const Real h, + const Real velocity = 1.0 ); }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { - public: - typedef Meshes::Grid< 2, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; - typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0 ); - - template< int sizeSArray > - __cuda_callable__ bool updateCell( volatile Real *sArray, - int thri, int thrj, const Real hx, const Real hy, - const Real velocity = 1.0 ); - - template< int sizeSArray > - void updateBlocks( InterfaceMapType interfaceMap, - MeshFunctionType aux, - MeshFunctionType helpFunc, - ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); - - void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); + public: + typedef Meshes::Grid< 2, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0 ); + + template< int sizeSArray > + __cuda_callable__ bool updateCell( volatile Real *sArray, + int thri, int thrj, const Real hx, const Real hy, + const Real velocity = 1.0 ); + + template< int sizeSArray > + void updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { - public: - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0); - - __cuda_callable__ bool updateCell( volatile Real sArray[10][10][10], - int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, - const Real velocity = 1.0 ); + public: + typedef Meshes::Grid< 3, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0); + + template< int sizeSArray > + void updateBlocks( const InterfaceMapType interfaceMap, + const MeshFunctionType aux, + MeshFunctionType& helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); + + template< int sizeSArray > + __cuda_callable__ bool updateCell3D( volatile Real *sArray, + int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, + const Real velocity = 1.0 ); }; template < typename T1, typename T2 > @@ -126,46 +136,46 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, - bool *BlockIterDevice ); + const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, + bool *BlockIterDevice ); template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 5083544e2..8f7937541 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -148,6 +148,7 @@ updateBlocks( InterfaceMapType interfaceMap, } + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) @@ -263,6 +264,370 @@ updateBlocks( InterfaceMapType interfaceMap, } } } +template< typename Real, + typename Device, + typename Index > +template< int sizeSArray > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +updateBlocks( const InterfaceMapType interfaceMap, + const MeshFunctionType aux, + MeshFunctionType& helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) +{ +//#pragma omp parallel for schedule( dynamic ) + for( int i = 0; i < BlockIterHost.getSize(); i++ ) + { + if( BlockIterHost[ i ] ) + { + MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + int dimZ = mesh.getDimensions().z(); + //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; + int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockz = dimZ/numThreadsPerBlock + ((dimZ%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; + int xkolik = numThreadsPerBlock + 1; + int ykolik = numThreadsPerBlock + 1; + int zkolik = numThreadsPerBlock + 1; + + + int blIdz = i/( numOfBlockx * numOfBlocky ); + int blIdy = (i-blIdz*numOfBlockx * numOfBlocky )/(numOfBlockx ); + int blIdx = (i-blIdz*numOfBlockx * numOfBlocky )%( numOfBlockx ); + //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*numThreadsPerBlock+1; + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*numThreadsPerBlock+1; + if( numOfBlockz-1 == blIdz ) + zkolik = dimZ - (blIdz)*numThreadsPerBlock+1; + //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; + + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; + changed[ 0 ] = 1;*/ + Real hx = mesh.getSpaceSteps().x(); + Real hy = mesh.getSpaceSteps().y(); + Real hz = mesh.getSpaceSteps().z(); + + bool changed = false; + BlockIterHost[ i ] = 0; + + + Real *sArray; + sArray = new Real[ sizeSArray * sizeSArray * sizeSArray ]; + if( sArray == nullptr ) + std::cout << "Error while allocating memory for sArray." << std::endl; + + for( int k = 0; k < sizeSArray; k++ ) + for( int l = 0; l < sizeSArray; l++ ) + for( int m = 0; m < sizeSArray; m++ ){ + sArray[ m * sizeSArray * sizeSArray + k * sizeSArray + l ] = std::numeric_limits< Real >::max(); + } + + + for( int thrk = 0; thrk < numThreadsPerBlock; thrk++ ) + for( int thrj = 0; thrj < numThreadsPerBlock; thrj++ ) + { + if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj * dimX -1 + thrk*dimX*dimY ]; + + if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + xkolik ] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy *numThreadsPerBlock*dimX+ blIdx*numThreadsPerBlock + numThreadsPerBlock + thrj * dimX + thrk*dimX*dimY ]; + + if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray +0*sizeSArray + thrj+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock - dimX + thrj + thrk*dimX*dimY ]; + + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + (blIdy+1) * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj + thrk*dimX*dimY ]; + + if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock - dimX * dimY + thrj * dimX + thrk ]; + + if( dimZ > (blIdz+1) * numThreadsPerBlock && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = + aux[ (blIdz+1)*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj * dimX + thrk ]; + } + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + sArray[(m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1] = + aux[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ]; + } + } + } + /*string s; + int numWhile = 0; + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ){ + //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + //printf("In with point m = %d, k = %d, l = %d\n", m, k, l); + changed = this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz) || changed; + + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s ); + */ + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = 0; l template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = 0; l template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + if( changed ){ + BlockIterHost[ i ] = 1; + } + + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) { + for( int m = 0; m < numThreadsPerBlock; m++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ){ + helpFunc[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] = + sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + //std::cout << helpFunc[ m*dimX*dimY + k*dimX + l ] << " "; + } + } + //std::cout << std::endl; + } + //std::cout << std::endl; + } + //helpFunc.save( "helpF.tnl"); + delete []sArray; + } + } +} +template< typename Real, + typename Device, + typename Index > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) +{ + int* BlockIterPom; + BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ]; + + for( int i = 0; i< BlockIterHost.getSize(); i++) + { + BlockIterPom[ i ] = 0; + + int m=0, l=0, k=0; + l = i/( numBlockX * numBlockY ); + k = (i-l*numBlockX * numBlockY )/(numBlockX ); + m = (i-l*numBlockX * numBlockY )%( numBlockX ); + + if( m > 0 && BlockIterHost[ i - 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( l > 0 && BlockIterHost[ i - numBlockX*numBlockY ] ){ + BlockIterPom[ i ] = 1; + }else if( l < numBlockZ-1 && BlockIterHost[ i + numBlockX*numBlockY ] ){ + BlockIterPom[ i ] = 1; + } + } + for( int i = 0; i< BlockIterHost.getSize(); i++) + { + BlockIterHost[ i ] = BlockIterPom[ i ]; + } +} + template< typename Real, typename Device, @@ -619,8 +984,8 @@ initInterface( const MeshFunctionPointer& _input, { cell.refresh(); output[ cell.getIndex() ] = - input( cell ) > 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); + input( cell ) > 0 ? 10://std::numeric_limits< RealType >::max() : + -10;//- std::numeric_limits< RealType >::max(); interfaceMap[ cell.getIndex() ] = false; } @@ -967,6 +1332,82 @@ updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real return false; } +template< typename Real, + typename Device, + typename Index > +template< int sizeSArray > +__cuda_callable__ +bool +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +updateCell3D( volatile Real *sArray, int thri, int thrj, int thrk, + const Real hx, const Real hy, const Real hz, const Real v ) +{ + const RealType value = sArray[thrk *sizeSArray * sizeSArray + thrj * sizeSArray + thri]; + + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + c = TNL::argAbsMin( sArray[ (thrk+1)* sizeSArray*sizeSArray + thrj * sizeSArray + thri ], + sArray[ (thrk-1) * sizeSArray *sizeSArray + thrj* sizeSArray + thri ] ); + + b = TNL::argAbsMin( sArray[ thrk* sizeSArray*sizeSArray + (thrj+1) * sizeSArray + thri ], + sArray[ thrk* sizeSArray * sizeSArray + (thrj-1)* sizeSArray +thri ] ); + + a = TNL::argAbsMin( sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri+1 ], + sArray[ thrk* sizeSArray * sizeSArray + thrj* sizeSArray +thri-1 ] ); + + /*if( thrk == 8 ) + printf("Calculating a = %f, b = %f, c = %f\n" , a, b, c );*/ + + if( fabs( a ) == 10&& //std::numeric_limits< RealType >::max() && + fabs( b ) == 10&&//std::numeric_limits< RealType >::max() && + fabs( c ) == 10)//std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + + sortMinims( pom ); + + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) + { + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + } + + return false; +} #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -1215,78 +1656,4 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) else return false; } - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool -tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: -updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, - const Real hx, const Real hy, const Real hz, const Real v ) -{ - const RealType value = sArray[thrk][thrj][thri]; - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], - sArray[ thrk-1 ][ thrj ][ thri ] ); - - b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], - sArray[ thrk ][ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], - sArray[ thrk ][ thrj ][ thri-1 ] ); - - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - - sortMinims( pom ); - - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - } - - return false; -} #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index 60c690e06..57b1886e8 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -1,9 +1,9 @@ /*************************************************************************** - FastSweepingMethod.h - description - ------------------- - begin : Jul 14, 2016 - copyright : (C) 2017 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz + FastSweepingMethod.h - description + ------------------- + begin : Jul 14, 2016 + copyright : (C) 2017 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ @@ -17,132 +17,134 @@ template< typename Mesh, - typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > + typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > class FastSweepingMethod { }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 1, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 1, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 1, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + + protected: const IndexType maxIterations; }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 2, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - using typename BaseType::ArrayContainer; - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 2, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + protected: const IndexType maxIterations; }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 3, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; + + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + + protected: const IndexType maxIterations; }; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index fa2716897..d5ce1efe1 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -15,9 +15,12 @@ #include "tnlFastSweepingMethod.h" #include -#include +#include + + +#include #include #include @@ -80,16 +83,48 @@ solve( const MeshPointer& mesh, MeshFunctionType aux = *auxPtr; +//#ifdef HAVE_MPI + bool a = Communicators::MpiCommunicator::IsInitialized(); + if( a ) + printf("Je Init\n"); + else + printf("Neni Init\n"); +//#endif while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 16; + int numThreadsPerBlock = -1; + + numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0)); + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + if( numThreadsPerBlock <= 16 ) + numThreadsPerBlock = 16; + else if(numThreadsPerBlock <= 32 ) + numThreadsPerBlock = 32; + else if(numThreadsPerBlock <= 64 ) + numThreadsPerBlock = 64; + else if(numThreadsPerBlock <= 128 ) + numThreadsPerBlock = 128; + else if(numThreadsPerBlock <= 256 ) + numThreadsPerBlock = 256; + else if(numThreadsPerBlock <= 512 ) + numThreadsPerBlock = 512; + else + numThreadsPerBlock = 1024; + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + + if( numThreadsPerBlock == -1 ){ + printf("Fail in setting numThreadsPerBlock.\n"); + break; + } + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; /*Real **sArray = new Real*[numBlocksX*numBlocksY]; @@ -115,13 +150,29 @@ solve( const MeshPointer& mesh, } std::cout<template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + switch ( numThreadsPerBlock ){ + case 16: + this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 32: + this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 64: + this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 128: + this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 256: + this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 512: + this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + default: + this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + } + //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ @@ -131,14 +182,14 @@ solve( const MeshPointer& mesh, } } numWhile++; - std::cout <<"numWhile = "<< numWhile < BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -408,6 +459,7 @@ solve( const MeshPointer& mesh, } iteration++; } + //#endif aux.save("aux-final.tnl"); } @@ -527,7 +579,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< /** FOR FIM METHOD */ - + if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] ) { __syncthreads(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 65aba5bf5..5af33cf29 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -64,9 +64,6 @@ solve( const MeshPointer& mesh, interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); -#ifdef HAVE_CUDA - cudaDeviceSynchronize(); -#endif auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); @@ -78,170 +75,259 @@ solve( const MeshPointer& mesh, { if( std::is_same< DeviceType, Devices::Host >::value ) { - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-1.tnl" ); + int numThreadsPerBlock = 64; - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-3.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-4.tnl" ); + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksZ = mesh->getDimensions().z() / numThreadsPerBlock + (mesh->getDimensions().z() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "5 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-5.tnl" ); + /*Real **sArray = new Real*[numBlocksX*numBlocksY]; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + sArray[ i ] = new Real [ (numThreadsPerBlock + 2)*(numThreadsPerBlock + 2)];*/ - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "6 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-6.tnl" ); + ArrayContainer BlockIterHost; + BlockIterHost.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterHost.setValue( 1 ); + int IsCalculationDone = 1; - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "7 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + MeshFunctionPointer helpFunc( mesh ); + MeshFunctionPointer helpFunc1( mesh ); + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + //std::cout<< "Size = " << BlockIterHost.getSize() << std::endl; + /*for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + + //Reduction + for( int i = 0; i < BlockIterHost.getSize(); i++ ){ + if( IsCalculationDone == 0 ){ + IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; + //break; } } - } - //aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "8 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + numWhile++; + std::cout <<"numWhile = "<< numWhile <-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ){ + //std::cout << (*auxPtr)[ k * numBlocksX * numBlocksY + j * numBlocksX + i ] << " "; + std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ]; + } + std::cout << std::endl; } + std::cout << std::endl; } + std::cout << std::endl;*/ + + this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY, numBlocksZ ); + + /*for( int k = 0; k < numBlocksZ; k++ ){ + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ){ + //std::cout << (*auxPtr)[ k * numBlocksX * numBlocksY + j * numBlocksX + i ] << " "; + std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ]; + } + std::cout << std::endl; + } + std::cout << std::endl; + }*/ + + /*for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << "BlockIterHost = "<< j*numBlocksX + i<< " ," << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + + //std::cout<getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-2.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-4.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "5 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-5.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "6 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-6.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "7 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-7.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "8 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + }*/ } if( std::is_same< DeviceType, Devices::Cuda >::value ) { @@ -389,7 +475,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< { __syncthreads(); - __shared__ volatile bool changed[ (sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)]; + __shared__ volatile bool changed[ 8*8*8/*(sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)*/]; changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 && thrk == 0 ) @@ -402,6 +488,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thrj == 1 && thri == 1 && thrk == 1 ) { + //printf( "We are in the calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ); hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); hz = mesh.getSpaceSteps().z(); @@ -410,8 +497,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< dimZ = mesh.getDimensions().z(); BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] = 0; } - __shared__ volatile Real sArray[sizeSArray][sizeSArray][sizeSArray]; - sArray[thrk+1][thrj+1][thri+1] = std::numeric_limits< Real >::max(); + __shared__ volatile Real sArray[ 10*10*10/*sizeSArray * sizeSArray * sizeSArray*/ ]; + sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = std::numeric_limits< Real >::max(); //filling sArray edges int numOfBlockx; @@ -426,6 +513,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< numOfBlockx = gridDim.x; numOfBlocky = gridDim.y; numOfBlockz = gridDim.z; + __syncthreads(); if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*blockDim.x+1; @@ -438,54 +526,55 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thri == 0 ) { if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; + sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; else - sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); + sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + xkolik ] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; else - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1)*sizeSArray + xkolik] = std::numeric_limits< Real >::max(); } if( thri == 2 ) { if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray +0*sizeSArray + thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; else - sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + 0*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 3 ) { if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; else - sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 4 ) { if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; + sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; else - sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + sArray[0 * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thrk+1] = std::numeric_limits< Real >::max(); } if( thri == 5 ) { if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; else - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = std::numeric_limits< Real >::max(); } if( i < dimX && j < dimY && k < dimZ ) { - sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; + sArray[(thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; } __syncthreads(); + while( changed[ 0 ] ) { __syncthreads(); @@ -493,11 +582,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< changed[ currentIndex ] = false; //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) + if( i < dimX && j < dimY && k < dimZ ) { - if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) + if( ! interfaceMap[ k*dimX*dimY + j * dimX + i ] ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); + changed[ currentIndex ] = ptr.updateCell3D< sizeSArray >( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); } } __syncthreads(); @@ -535,7 +624,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } } __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + if( currentIndex < 32 ) { if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; @@ -548,7 +637,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) { - for(int m = 0; m < 8; m++){ + //for(int m = 0; m < 8; m++){ + int m = 4; for(int n = 0; n<8; n++){ for(int b=0; b<8; b++) printf(" %i ", changed[m*64 + n*8 + b]); @@ -556,16 +646,19 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } printf("\n \n"); } - }*/ + //}*/ + if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) { - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + //printf( "Setting block calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ); + BlockIterDevice[ blIdz * gridDim.x * gridDim.y + blIdy * gridDim.x + blIdx ] = 1; } __syncthreads(); } if( i < dimX && j < dimY && k < dimZ ) - helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; + helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thri+1 ]; + } } #endif -- GitLab From bf5e6fd6c9ffaa78e29645493ab3f42a00fbf1f6 Mon Sep 17 00:00:00 2001 From: Fencl Date: Thu, 4 Oct 2018 19:30:14 +0200 Subject: [PATCH 10/20] Chess model implemented in 2D. --- .../tnlDirectEikonalMethodsBase.h | 8 +- .../tnlDirectEikonalMethodsBase_impl.h | 12 +- .../tnlFastSweepingMethod2D_impl.h | 212 ++++++++++-------- 3 files changed, 124 insertions(+), 108 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index b981a92a8..eb7cbd2a5 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Real *aux, - int *BlockIterDevice); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + int *BlockIterDevice, int oddEvenBlock); __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); -template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ); +/*template < typename Real, typename Device, typename Index > +__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 649a5ad43..cfea6aca0 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; - if ( fabs( tmp ) > 0.01*h ) + if ( fabs( tmp ) > 0.001*h ) return true; else return false; @@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; @@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.01*hx ) + if ( fabs( tmp ) > 0.001*hx ) return true; else return false; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 6703843c1..7e4028fbe 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -26,7 +26,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() -: maxIterations( 100 ) +: maxIterations( 1 ) { } @@ -250,7 +250,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); + //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); @@ -261,7 +261,7 @@ solve( const MeshPointer& mesh, int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); int *dBlock; cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); - + int oddEvenBlock = 0; while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) @@ -269,19 +269,30 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), - dAux, - BlockIterDevice ); + auxPtr.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - + TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); + //aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); cudaFree( dAux ); cudaFree( BlockIterDevice ); cudaFree( dBlock ); @@ -299,7 +310,7 @@ solve( const MeshPointer& mesh, } #ifdef HAVE_CUDA -template < typename Real, typename Device, typename Index > +/*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) { int i = threadIdx.x + blockDim.x*blockIdx.x; @@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; } -} +}*/ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) { @@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Real *aux, - int *BlockIterDevice ) + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + int *BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; @@ -417,109 +428,114 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } __syncthreads(); - if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - } - - if( thri == 1 ) - { - if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); - } - - if( thri == 2 ) - { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); - } - - if( thri == 3 ) + if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) { - if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); - } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; - } - __syncthreads(); + if( thri == 0 ) + { + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) + sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + } + + if( thri == 1 ) + { + if( blIdx != 0 && thrj+1 < ykolik ) + sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + } + + if( thri == 2 ) + { + if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) + sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + } + + if( thri == 3 ) + { + if( blIdy != 0 && thrj+1 < xkolik ) + sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } + - while( changed[ 0 ] ) - { - __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + } + __syncthreads(); + + while( changed[ 0 ] ) { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + __syncthreads(); + + changed[ currentIndex] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + { + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + } } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y == 1024 ) - { - if( currentIndex < 512 ) + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y == 1024 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; + if( currentIndex < 512 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 512 ) - { - if( currentIndex < 256 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 512 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; + if( currentIndex < 256 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 256 ) - { - if( currentIndex < 128 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 256 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; + if( currentIndex < 128 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; + } } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 128 ) - { - if( currentIndex < 64 ) + __syncthreads(); + if( blockDim.x*blockDim.y >= 128 ) { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + if( currentIndex < 64 ) + { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + } } + __syncthreads(); + if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + { + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + if( changed[ 0 ] && thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; + __syncthreads(); } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU - { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; - } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; - __syncthreads(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) + aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; + } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; } #endif -- GitLab From 970e64480b33f755d2f5b6e441b77d75494ef920 Mon Sep 17 00:00:00 2001 From: Fencl Date: Sun, 7 Oct 2018 12:55:16 +0200 Subject: [PATCH 11/20] FIM method implemented. Neighbours are being found on CPU. 3D parallel method disabled because of Array changes. --- .../tnlDirectEikonalMethodsBase.h | 9 +- .../tnlFastSweepingMethod2D_impl.h | 199 +++++++++++------- .../tnlFastSweepingMethod3D_impl.h | 4 +- 3 files changed, 134 insertions(+), 78 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index eb7cbd2a5..c92368deb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -113,6 +113,8 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); +template < typename Index > +void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -130,8 +132,11 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - int *BlockIterDevice, int oddEvenBlock); -__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + +template < typename Index > +__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); /*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 7e4028fbe..817811c84 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -235,13 +235,6 @@ solve( const MeshPointer& mesh, { // TODO: CUDA code #ifdef HAVE_CUDA - - Real *dAux; - cudaMalloc(&dAux, ( mesh->getDimensions().x() * mesh->getDimensions().y() ) * sizeof( Real ) ); - - - - const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); @@ -250,18 +243,30 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); + TNL::Containers::Array< int, Devices::Host, IndexType > BlockIter; + BlockIter.setSize( numBlocksX * numBlocksY ); + BlockIter.setValue( 0 ); + /*int* BlockIter = (int*)malloc( ( numBlocksX * numBlocksY ) * sizeof( int ) ); + for( int i = 0; i < numBlocksX*numBlocksY +1; i++) + BlockIter[i] = 1;*/ - //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); - - int *BlockIterDevice; int BlockIterD = 1; + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY ); + BlockIterDevice.setValue( 1 ); + /*int *BlockIterDevice; cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) ); + cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice);*/ + int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); - int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); - int oddEvenBlock = 0; + + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + dBlock.setValue( 0 ); + /*int *dBlock; + cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ + while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) @@ -270,89 +275,132 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); - TNL_CHECK_CUDA_DEVICE; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); - TNL_CHECK_CUDA_DEVICE; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + BlockIter = BlockIterDevice; + //cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyDeviceToHost); + GetNeighbours( BlockIter, numBlocksX, numBlocksY ); + + BlockIterDevice = BlockIter; + //cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + + CudaParallelReduc<<< nBlocks, 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - //aux1<<>>( auxPtr.template modifyData< Device>(), dAux, 0 ); - cudaFree( dAux ); - cudaFree( BlockIterDevice ); + /*cudaFree( BlockIterDevice ); cudaFree( dBlock ); + delete BlockIter;*/ cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - //aux = *auxPtr; - //interfaceMap = *interfaceMapPtr; + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; #endif } iteration++; } aux.save("aux-final.tnl"); } - -#ifdef HAVE_CUDA -/*template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) +template < typename Index > +void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.template getMesh< Devices::Cuda >(); - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && a == 1 ) - { - dAux[ j*mesh.getDimensions().x() + i ] = aux[ j*mesh.getDimensions().x() + i ]; - } - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && a == 0 ) - { - aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; - } - -}*/ + TNL::Containers::Array< int, Devices::Host, Index > BlockIterPom; + BlockIterPom.setSize( numBlockX * numBlockY ); + BlockIterPom.setValue( 0 ); + /*int* BlockIterPom; + BlockIterPom = new int[numBlockX * numBlockY];*/ + /*for(int i = 0; i < numBlockX * numBlockY; i++) + BlockIterPom[ i ] = 0;*/ + for(int i = 0; i < numBlockX * numBlockY; i++) + { + + if( BlockIter[ i ] ) + { + // i = k*numBlockY + m; + int m=0, k=0; + m = i%numBlockY; + k = i/numBlockY; + if( k > 0 && numBlockY > 1 ) + BlockIterPom[i - numBlockX] = 1; + if( k < numBlockY-1 && numBlockY > 1 ) + BlockIterPom[i + numBlockX] = 1; + + if( m >= 0 && m < numBlockX - 1 && numBlockX > 1 ) + BlockIterPom[ i+1 ] = 1; + if( m <= numBlockX -1 && m > 0 && numBlockX > 1 ) + BlockIterPom[ i-1 ] = 1; + } + } + for(int i = 0; i < numBlockX * numBlockY; i++ ){ +/// if( !BlockIter[ i ] ) + BlockIter[ i ] = BlockIterPom[ i ]; +/// else +/// BlockIter[ i ] = 0; + } + /*for( int i = numBlockX-1; i > -1; i-- ) + { + for( int j = 0; j< numBlockY; j++ ) + std::cout << BlockIter[ i*numBlockY + j ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + //delete[] BlockIterPom; +} -__global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) +#ifdef HAVE_CUDA +template < typename Index > +__global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) { int i = threadIdx.x; int blId = blockIdx.x; + /*if ( i == 0 && blId == 0 ){ + printf( "nBlocks = %d \n", nBlocks ); + for( int j = nBlocks-1; j > -1 ; j--){ + printf( "cislo = %d \n", BlockIterDevice[ j ] ); + } + }*/ __shared__ volatile int sArray[ 512 ]; - sArray[ i ] = false; - if(blId * 1024 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 1024 + i ]; + sArray[ i ] = 0; + if( blId * 512 + i < nBlocks ) + sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; + __syncthreads(); - if (blockDim.x * blockDim.y == 1024) { + if (blockDim.x == 1024) { if (i < 512) - sArray[ i ] += sArray[ i ]; + sArray[ i ] += sArray[ i + 512 ]; } __syncthreads(); - if (blockDim.x * blockDim.y >= 512) { + if (blockDim.x >= 512) { if (i < 256) { - sArray[ i ] += sArray[ i ]; + sArray[ i ] += sArray[ i + 256 ]; } } - if (blockDim.x * blockDim.y >= 256) { + __syncthreads(); + if (blockDim.x >= 256) { if (i < 128) { sArray[ i ] += sArray[ i + 128 ]; } } __syncthreads(); - if (blockDim.x * blockDim.y >= 128) { + if (blockDim.x >= 128) { if (i < 64) { sArray[ i ] += sArray[ i + 64 ]; } @@ -360,12 +408,12 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock __syncthreads(); if (i < 32 ) { - if( blockDim.x * blockDim.y >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x * blockDim.y >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x * blockDim.y >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x * blockDim.y >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x * blockDim.y >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x * blockDim.y >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; + if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; + if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; + if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; + if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; + if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; } if( i == 0 ) @@ -378,14 +426,15 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - int *BlockIterDevice, int oddEvenBlock ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int i = thri + blockDim.x*blIdx; int j = blockDim.y*blIdy + thrj; int currentIndex = thrj * blockDim.x + thri; - + if( BlockIterDevice[ blIdy * gridDim.x + blIdx] ) + { //__shared__ volatile bool changed[ blockDim.x*blockDim.y ]; __shared__ volatile bool changed[16*16]; changed[ currentIndex ] = false; @@ -424,13 +473,13 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*blockDim.y+1; - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; + //BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; } __syncthreads(); - if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) - { - + if(thri == 0 && thrj == 0 ) + BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; + if( thri == 0 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) @@ -528,14 +577,16 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; } - if( changed[ 0 ] && thri == 0 && thrj == 0 ) + if( changed[ 0 ] && thri == 0 && thrj == 0 ){ BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; + } __syncthreads(); } - + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; - } + /*if( thri == 0 && thrj == 0 ) + printf( "Block ID = %d, value = %d \n", (blIdy * numOfBlockx + blIdx), BlockIterDevice[ blIdy * numOfBlockx + blIdx ] );*/ } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index b024979cc..8c85745cd 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -272,8 +272,8 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + //CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); + //CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); -- GitLab From 08ec37be1ec80e299cea295a057a9515e9f04896 Mon Sep 17 00:00:00 2001 From: Fencl Date: Mon, 22 Oct 2018 21:13:54 +0200 Subject: [PATCH 12/20] FIM method is now faster than chess method but some random error occurs. --- .../tnlDirectEikonalMethodsBase.h | 17 +- .../tnlDirectEikonalMethodsBase_impl.h | 193 ++++ .../hamilton-jacobi/tnlFastSweepingMethod.h | 3 +- .../tnlFastSweepingMethod2D_impl.h | 871 ++++++++---------- .../tnlFastSweepingMethod3D_impl.h | 33 +- 5 files changed, 629 insertions(+), 488 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index c92368deb..08ed947ed 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -61,8 +61,9 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > typedef Index IndexType; typedef Functions::MeshFunction< MeshType > MeshFunctionType; typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; void initInterface( const MeshFunctionPointer& input, MeshFunctionPointer& output, @@ -76,6 +77,11 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ bool updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); + void updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + ArrayContainer BlockIterHost, int numThreadsPerBlock ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, @@ -132,14 +138,15 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 ); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); -/*template < typename Real, typename Device, typename Index > -__global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ +template < typename Index > +__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, @@ -155,7 +162,7 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - int *BlockIterDevice ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); #endif #include "tnlDirectEikonalMethodsBase_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index cfea6aca0..1f9fc5eeb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -89,6 +89,199 @@ initInterface( const MeshFunctionPointer& _input, } } +template< typename Real, + typename Device, + typename Index > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + ArrayContainer BlockIterHost, int numThreadsPerBlock ) +{ + for( int i = 0; i < BlockIterHost.getSize(); i++ ) + { + if( BlockIterHost[ i ] ) + { + MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + int xkolik = numThreadsPerBlock + 1; + int ykolik = numThreadsPerBlock + 1; + + int blIdx = i%numOfBlockx; + int blIdy = i/numOfBlocky; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*numThreadsPerBlock+1; + + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*numThreadsPerBlock+1; + + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; + changed[ 0 ] = 1;*/ + Real hx = mesh.getSpaceSteps().x(); + Real hy = mesh.getSpaceSteps().y(); + + Real changed1[ 16*16 ]; + /*Real changed2[ 16*16 ]; + Real changed3[ 16*16 ]; + Real changed4[ 16*16 ];*/ + Real sArray[18][18]; + + for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ ) + for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ ) + sArray[thrj][thri] = std::numeric_limits< Real >::max(); + + BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; + + for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) + { + if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) + sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + + + if( blIdx != 0 && thrj+1 < ykolik ) + sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) + sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + + if( blIdy != 0 && thrj+1 < xkolik ) + sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = 0; l < numThreadsPerBlock; l++ ) + sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = 0; l < numThreadsPerBlock; l++ ){ + changed1[ k*numThreadsPerBlock + l ] = 0; + /*changed2[ k*numThreadsPerBlock + l ] = 0; + changed3[ k*numThreadsPerBlock + l ] = 0; + changed4[ k*numThreadsPerBlock + l ] = 0;*/ + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + { + if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) + { + /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + } + } + } + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ]; + /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ]; + changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ]; + changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/ + } + + if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ ) + BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1; + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY && + (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) ) + aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ]; + //std::cout<< sArray[k+1][l+1]; + } + //std::cout< +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) +{ + int* BlockIterPom; + BlockIterPom = new int [numBlockX * numBlockY]; + + for(int i = 0; i < numBlockX * numBlockY; i++) + { + BlockIterPom[ i ] = 0; + if( BlockIterHost[ i ] ) + { + // i = k*numBlockY + m; + int m=0, k=0; + m = i%numBlockX; + k = i/numBlockX; + if( k > 0 ) + BlockIterPom[i - numBlockX] = 1; + if( k < numBlockY - 1 ) + BlockIterPom[i + numBlockX] = 1; + + if( m < numBlockX - 1 ) + BlockIterPom[ i+1 ] = 1; + if( m > 0 ) + BlockIterPom[ i-1 ] = 1; + } + } + for(int i = 0; i < numBlockX * numBlockY; i++ ) + //if( !BlockIter[ i ] ) + BlockIterHost[ i ] = BlockIterPom[ i ]; + /*else + BlockIter[ i ] = 0;*/ + /*for( int i = numBlockX-1; i > -1; i-- ) + { + for( int j = 0; j< numBlockY; j++ ) + std::cout << BlockIterHost[ i*numBlockY + j ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + delete[] BlockIterPom; +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index fa8077427..60c690e06 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -88,7 +88,8 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > using typename BaseType::InterfaceMapType; using typename BaseType::MeshFunctionType; using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; FastSweepingMethod(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 817811c84..c6cc575d1 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -21,403 +21,348 @@ #include template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 1 ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > const Index& FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: getMaxIterations() const { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: setMaxIterations( const IndexType& maxIterations ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ) -{ - /*MeshFunctionType v; - v.setMesh(mesh); - double A[320][320]; - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - A[i][j] = 0; - - std::ifstream file("/home/maty/Downloads/mapa2.txt"); - - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - file >> A[i][j]; - file.close(); - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - v[i*320 + j] = A[i][j]; - v.save("mapa.tnl");*/ - - - MeshFunctionPointer auxPtr; - InterfaceMapPointer interfaceMapPtr; - auxPtr->setMesh( mesh ); - interfaceMapPtr->setMesh( mesh ); - std::cout << "Initiating the interface cells ..." << std::endl; - BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ) +{ + MeshFunctionPointer auxPtr; + InterfaceMapPointer interfaceMapPtr; + auxPtr->setMesh( mesh ); + interfaceMapPtr->setMesh( mesh ); + std::cout << "Initiating the interface cells ..." << std::endl; + BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + + auxPtr->save( "aux-ini.tnl" ); + + typename MeshType::Cell cell( *mesh ); + + IndexType iteration( 0 ); + InterfaceMapType interfaceMap = *interfaceMapPtr; + MeshFunctionType aux = *auxPtr; + + + + + while( iteration < this->maxIterations ) + { + if( std::is_same< DeviceType, Devices::Host >::value ) + { + int numThreadsPerBlock = 16; + + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + + + ArrayContainer BlockIterHost; + BlockIterHost.setSize( numBlocksX * numBlocksY ); + BlockIterHost.setValue( 1 ); + /*for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<updateBlocks( interfaceMap, aux, BlockIterHost, numThreadsPerBlock); - auxPtr->save( "aux-ini.tnl" ); - - typename MeshType::Cell cell( *mesh ); - - IndexType iteration( 0 ); - InterfaceMapType interfaceMap = *interfaceMapPtr; - MeshFunctionType aux = *auxPtr; - while( iteration < this->maxIterations ) - { - if( std::is_same< DeviceType, Devices::Host >::value ) - { - for( cell.getCoordinates().y() = 0; + this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY ); + + //Reduction + for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + //std::cout<< BlockIterHost[ l*numBlocksX + k ]; + BlockIterHost[ 0 ] = BlockIterHost[ 0 ] || BlockIterHost[ l*numBlocksX + k ]; + } + //std::cout<getDimensions().y(); cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-1.tnl" ); - - for( cell.getCoordinates().y() = 0; + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh->getDimensions().y(); cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-2.tnl" ); - - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-2.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; cell.getCoordinates().y() >= 0 ; cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-3.tnl" ); - - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; cell.getCoordinates().y() >= 0; cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-4.tnl" ); - - /*for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().y(); - cell.getCoordinates().x()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().x(); - cell.getCoordinates().y()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - - aux.save( "aux-5.tnl" ); - - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().y(); - cell.getCoordinates().x()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().x() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - aux.save( "aux-6.tnl" ); - - for( cell.getCoordinates().x() = mesh->getDimensions().y() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().x(); - cell.getCoordinates().y()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().x() = mesh->getDimensions().y() - 1; - cell.getCoordinates().x() >= 0; - cell.getCoordinates().x()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().x() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - }*/ - } - if( std::is_same< DeviceType, Devices::Cuda >::value ) { - // TODO: CUDA code + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + }*/ + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { + // TODO: CUDA code #ifdef HAVE_CUDA - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY ); - - tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - - TNL::Containers::Array< int, Devices::Host, IndexType > BlockIter; - BlockIter.setSize( numBlocksX * numBlocksY ); - BlockIter.setValue( 0 ); - /*int* BlockIter = (int*)malloc( ( numBlocksX * numBlocksY ) * sizeof( int ) ); - for( int i = 0; i < numBlocksX*numBlocksY +1; i++) - BlockIter[i] = 1;*/ - - int BlockIterD = 1; - - TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; - BlockIterDevice.setSize( numBlocksX * numBlocksY ); - BlockIterDevice.setValue( 1 ); - /*int *BlockIterDevice; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) ); - cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice);*/ - - int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); - - TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; - dBlock.setSize( nBlocks ); - dBlock.setValue( 0 ); - /*int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ - - while( BlockIterD ) - { - /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ i ] = false;*/ - - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - BlockIter = BlockIterDevice; - //cudaMemcpy(BlockIter, BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyDeviceToHost); - GetNeighbours( BlockIter, numBlocksX, numBlocksY ); - - BlockIterDevice = BlockIter; - //cudaMemcpy(BlockIterDevice, BlockIter, ( numBlocksX * numBlocksY ) * sizeof( int ), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - CudaParallelReduc<<< nBlocks, 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - - /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - - } - /*cudaFree( BlockIterDevice ); - cudaFree( dBlock ); - delete BlockIter;*/ - cudaDeviceSynchronize(); - - TNL_CHECK_CUDA_DEVICE; - - aux = *auxPtr; - interfaceMap = *interfaceMapPtr; -#endif + TNL_CHECK_CUDA_DEVICE; + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY ); + + tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; + + int BlockIterD = 1; + + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY ); + BlockIterDevice.setValue( 1 ); + TNL_CHECK_CUDA_DEVICE; + int ne = 0; + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, ne); + TNL_CHECK_CUDA_DEVICE; + + /*TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; + BlockIterPom.setSize( numBlocksX * numBlocksY ); + BlockIterPom.setValue( 0 );*/ + /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ + /*int *BlockIterDevice; + cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ + int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); + //std::cout << "nBlocksNeigh = " << nBlocksNeigh << std::endl; + //free( BlockIter ); + /*int *BlockIterPom; + cudaMalloc((void**) &BlockIterPom, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ + + int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + TNL_CHECK_CUDA_DEVICE; + /*int *dBlock; + cudaMalloc((void**) &dBlock, nBlocks * sizeof( int ) );*/ + //int pocIter = 0; + while( BlockIterD ) + { + /*BlockIterPom1 = BlockIterDevice; + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << BlockIterPom1[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + BlockIterDevice, 1); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + /*int poc = 0; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + if( BlockIterPom1[ i ] ) + poc = poc+1; + std::cout << "pocet bloku, ktere se pocitali = " << poc << std::endl;*/ + + GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, /*BlockIterPom,*/ numBlocksX, numBlocksY ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + TNL_CHECK_CUDA_DEVICE; + + BlockIterD = dBlock.getElement( 0 ); + //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) + BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ + //pocIter ++; } - iteration++; - } - aux.save("aux-final.tnl"); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + //std::cout<< pocIter << std::endl; + + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; +#endif + } + iteration++; + } + aux.save("aux-final.tnl"); } + +#ifdef HAVE_CUDA template < typename Index > -void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ) +__global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) { - TNL::Containers::Array< int, Devices::Host, Index > BlockIterPom; - BlockIterPom.setSize( numBlockX * numBlockY ); - BlockIterPom.setValue( 0 ); - /*int* BlockIterPom; - BlockIterPom = new int[numBlockX * numBlockY];*/ - /*for(int i = 0; i < numBlockX * numBlockY; i++) - BlockIterPom[ i ] = 0;*/ - for(int i = 0; i < numBlockX * numBlockY; i++) + int i = blockIdx.x * 1024 + threadIdx.x; + + if( i < numBlockX * numBlockY ) { - - if( BlockIter[ i ] ) - { - // i = k*numBlockY + m; - int m=0, k=0; - m = i%numBlockY; - k = i/numBlockY; - if( k > 0 && numBlockY > 1 ) - BlockIterPom[i - numBlockX] = 1; - if( k < numBlockY-1 && numBlockY > 1 ) - BlockIterPom[i + numBlockX] = 1; + int pom = 0;//BlockIterPom[ i ] = 0; + int m=0, k=0; + m = i%numBlockX; + k = i/numBlockX; + if( m > 0 ) + if( BlockIterDevice[ i - 1 ] ) + pom = 1;//BlockIterPom[ i ] = 1; + if( m < numBlockX -1 && pom == 0 ) + if( BlockIterDevice[ i + 1 ] ) + pom = 1;//BlockIterPom[ i ] = 1; + if( k > 0 && pom == 0 ) + if( BlockIterDevice[ i - numBlockX ] ) + pom = 1;// BlockIterPom[ i ] = 1; + if( k < numBlockY -1 && pom == 0 ) + if( BlockIterDevice[ i + numBlockX ] ) + pom = 1;//BlockIterPom[ i ] = 1; + - if( m >= 0 && m < numBlockX - 1 && numBlockX > 1 ) - BlockIterPom[ i+1 ] = 1; - if( m <= numBlockX -1 && m > 0 && numBlockX > 1 ) - BlockIterPom[ i-1 ] = 1; - } - } - for(int i = 0; i < numBlockX * numBlockY; i++ ){ -/// if( !BlockIter[ i ] ) - BlockIter[ i ] = BlockIterPom[ i ]; -/// else -/// BlockIter[ i ] = 0; - } - /*for( int i = numBlockX-1; i > -1; i-- ) - { - for( int j = 0; j< numBlockY; j++ ) - std::cout << BlockIter[ i*numBlockY + j ]; - std::cout << std::endl; + + BlockIterDevice[ i ] = pom;//BlockIterPom[ i ]; } - std::cout << std::endl;*/ - //delete[] BlockIterPom; } -#ifdef HAVE_CUDA template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) { - int i = threadIdx.x; - int blId = blockIdx.x; - /*if ( i == 0 && blId == 0 ){ - printf( "nBlocks = %d \n", nBlocks ); - for( int j = nBlocks-1; j > -1 ; j--){ - printf( "cislo = %d \n", BlockIterDevice[ j ] ); - } - }*/ - __shared__ volatile int sArray[ 512 ]; - sArray[ i ] = 0; - if( blId * 512 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; - __syncthreads(); - - if (blockDim.x == 1024) { - if (i < 512) - sArray[ i ] += sArray[ i + 512 ]; - } - __syncthreads(); - if (blockDim.x >= 512) { - if (i < 256) { - sArray[ i ] += sArray[ i + 256 ]; - } - } - __syncthreads(); - if (blockDim.x >= 256) { - if (i < 128) { - sArray[ i ] += sArray[ i + 128 ]; - } + int i = threadIdx.x; + int blId = blockIdx.x; + __shared__ volatile int sArray[ 512 ]; + sArray[ i ] = 0; + if(blId * 512 + i < nBlocks ) + sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; + __syncthreads(); + if (blockDim.x == 1024) { + if (i < 512) + sArray[ i ] += sArray[ i + 512 ]; + } + __syncthreads(); + if (blockDim.x >= 512) { + if (i < 256) { + sArray[ i ] += sArray[ i + 256 ]; } - __syncthreads(); - if (blockDim.x >= 128) { - if (i < 64) { - sArray[ i ] += sArray[ i + 64 ]; - } + } + if (blockDim.x >= 256) { + if (i < 128) { + sArray[ i ] += sArray[ i + 128 ]; } - __syncthreads(); - if (i < 32 ) - { - if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + } + __syncthreads(); + if (blockDim.x >= 128) { + if (i < 64) { + sArray[ i ] += sArray[ i + 64 ]; } - - if( i == 0 ) - dBlock[ blId ] = sArray[ 0 ]; + } + __syncthreads(); + if (i < 32 ) + { + if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; + if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; + if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; + if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; + if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; + if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + } + + if( i == 0 ) + dBlock[ blId ] = sArray[ 0 ]; } @@ -426,10 +371,40 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne ) { - int thri = threadIdx.x; int thrj = threadIdx.y; - int blIdx = blockIdx.x; int blIdy = blockIdx.y; + int thri = threadIdx.x; int thrj = threadIdx.y; + int blIdx = blockIdx.x; int blIdy = blockIdx.y; + int grIdx = gridDim.x; + + if( BlockIterDevice[ blIdy * grIdx + blIdx] ) + { + + const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + __shared__ volatile int numOfBlockx; + __shared__ volatile int numOfBlocky; + __shared__ int xkolik; + __shared__ int ykolik; + __shared__ volatile int NE; + if( thri == 0 && thrj == 0 ) + { + xkolik = blockDim.x + 1; + ykolik = blockDim.y + 1; + numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); + numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*blockDim.x+1; + + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*blockDim.y+1; + BlockIterDevice[ blIdy * grIdx + blIdx ] = 0; + NE = ne; + } + __syncthreads(); + int i = thri + blockDim.x*blIdx; int j = blockDim.y*blIdy + thrj; int currentIndex = thrj * blockDim.x + thri; @@ -438,17 +413,15 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< //__shared__ volatile bool changed[ blockDim.x*blockDim.y ]; __shared__ volatile bool changed[16*16]; changed[ currentIndex ] = false; - if( thrj == 0 && thri == 0 ) - changed[ 0 ] = true; + changed[ 0 ] = true; - const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ Real hx; __shared__ Real hy; if( thrj == 1 && thri == 1 ) { - hx = mesh.getSpaceSteps().x(); - hy = mesh.getSpaceSteps().y(); + hx = mesh.getSpaceSteps().x(); + hy = mesh.getSpaceSteps().y(); } //__shared__ volatile Real sArray[ blockDim.y+2 ][ blockDim.x+2 ]; @@ -456,137 +429,89 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< sArray[thrj][thri] = std::numeric_limits< Real >::max(); //filling sArray edges - int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - __shared__ volatile int numOfBlockx; - __shared__ volatile int numOfBlocky; - __shared__ int xkolik; - __shared__ int ykolik; - if( thri == 0 && thrj == 0 ) + if( thri == 0 ) + { + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && NE == 1 ) + sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + else + sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + } + + if( thri == 1 ) { - xkolik = blockDim.x + 1; - ykolik = blockDim.y + 1; - numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); - numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + if( blIdx != 0 && thrj+1 < ykolik && NE == 1 ) + sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + else + sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + } - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - //BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; + if( thri == 2 ) + { + if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik && NE == 1 ) + sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); } - __syncthreads(); - if(thri == 0 && thrj == 0 ) - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 0; - - if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - } - - if( thri == 1 ) { - if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); } - - if( thri == 2 ) + } + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y == 1024 ) + { + if( currentIndex < 512 ) { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; } - - if( thri == 3 ) + } + __syncthreads(); + if( blockDim.x*blockDim.y >= 512 ) + { + if( currentIndex < 256 ) { - if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; } - - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; - } - __syncthreads(); - - while( changed[ 0 ] ) + } + __syncthreads(); + if( blockDim.x*blockDim.y >= 256 ) + { + if( currentIndex < 128 ) { - __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) - { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); - } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y == 1024 ) - { - if( currentIndex < 512 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 512 ) - { - if( currentIndex < 256 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 256 ) - { - if( currentIndex < 128 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y >= 128 ) - { - if( currentIndex < 64 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; - } - } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU - { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; - } - if( changed[ 0 ] && thri == 0 && thrj == 0 ){ - BlockIterDevice[ blIdy * numOfBlockx + blIdx ] = 1; - } - __syncthreads(); + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) - aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; + } + __syncthreads(); + if( blockDim.x*blockDim.y >= 128 ) + { + if( currentIndex < 64 ) + { + if( thri == 3 ) + { + if( blIdy != 0 && thrj+1 < xkolik && NE == 1 ) + sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; + else + sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + } + + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; } - /*if( thri == 0 && thrj == 0 ) - printf( "Block ID = %d, value = %d \n", (blIdy * numOfBlockx + blIdx), BlockIterDevice[ blIdy * numOfBlockx + blIdx ] );*/ + __syncthreads(); + + while( changed[ 0 ] ) + { + __syncthreads(); + + changed[ currentIndex] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 8c85745cd..4daf9fc92 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -258,13 +258,21 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; - int *BlockIterDevice; + int BlockIterD = 1; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) ); + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterDevice.setValue( 1 ); + /*int *BlockIterDevice; + cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/ int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); - int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); + + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + dBlock.setValue( 0 ); + /*int *dBlock; + cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ while( BlockIterD ) { @@ -272,17 +280,24 @@ solve( const MeshPointer& mesh, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice ); - //CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - //CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } - cudaFree( BlockIterDevice ); - cudaFree( dBlock ); + //cudaFree( BlockIterDevice ); + //cudaFree( dBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; @@ -302,7 +317,7 @@ template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - int *BlockIterDevice ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) { int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; -- GitLab From d40a55e3f1e9795f5ca3ef669fe980b8effeb1e3 Mon Sep 17 00:00:00 2001 From: Fencl Date: Tue, 30 Oct 2018 18:38:41 +0100 Subject: [PATCH 13/20] FIM method implemented for GPU and FIM-FSM implemented for CPU (parallel). --- .../tnlDirectEikonalMethodsBase.h | 18 +- .../tnlDirectEikonalMethodsBase_impl.h | 2045 +++++++++-------- .../tnlFastSweepingMethod2D_impl.h | 620 +++-- 3 files changed, 1440 insertions(+), 1243 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 08ed947ed..cbb1a1ff6 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -74,12 +74,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > const MeshEntity& cell, const RealType velocity = 1.0 ); - __cuda_callable__ bool updateCell( volatile Real sArray[18][18], + template< int sizeSArray > + __cuda_callable__ bool updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); + + template< int sizeSArray > void updateBlocks( InterfaceMapType interfaceMap, MeshFunctionType aux, - ArrayContainer BlockIterHost, int numThreadsPerBlock ); + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; @@ -119,9 +123,6 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); -template < typename Index > -void GetNeighbours( TNL::Containers::Array< int, Devices::Host, Index > BlockIter, int numBlockX, int numBlockY ); - #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, @@ -134,11 +135,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, bool *BlockIterDevice ); -template < typename Real, typename Device, typename Index > +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne = 1 ); + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 1f9fc5eeb..95971c9b8 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -1,4 +1,4 @@ - /* +/* * File: tnlDirectEikonalMethodsBase_impl.h * Author: oberhuber * @@ -13,233 +13,259 @@ #include "tnlFastSweepingMethod.h" template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, - MeshFunctionPointer& _output, - InterfaceMapPointer& _interfaceMap ) + MeshFunctionPointer& _output, + InterfaceMapPointer& _interfaceMap ) { - if( std::is_same< Device, Devices::Cuda >::value ) - { + if( std::is_same< Device, Devices::Cuda >::value ) + { #ifdef HAVE_CUDA - const MeshType& mesh = _input->getMesh(); - - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize ); - dim3 gridSize( numBlocksX ); - Devices::Cuda::synchronizeDevice(); - CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), - _output.template modifyData< Device >(), - _interfaceMap.template modifyData< Device >() ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize ); + dim3 gridSize( numBlocksX ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; #endif - } - if( std::is_same< Device, Devices::Host >::value ) - { - const MeshType& mesh = _input->getMesh(); - typedef typename MeshType::Cell Cell; - const MeshFunctionType& input = _input.getData(); - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - Cell cell( mesh ); - for( cell.getCoordinates().x() = 0; + } + if( std::is_same< Device, Devices::Host >::value ) + { + const MeshType& mesh = _input->getMesh(); + typedef typename MeshType::Cell Cell; + const MeshFunctionType& input = _input.getData(); + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + Cell cell( mesh ); + for( cell.getCoordinates().x() = 0; cell.getCoordinates().x() < mesh.getDimensions().x(); cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : - -std::numeric_limits< RealType >::max(); - interfaceMap[ cell.getIndex() ] = false; - } - - - const RealType& h = mesh.getSpaceSteps().x(); - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x() - 1; - cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : + -std::numeric_limits< RealType >::max(); + interfaceMap[ cell.getIndex() ] = false; + } + + + const RealType& h = mesh.getSpaceSteps().x(); + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x() - 1; + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + const auto& neighbors = cell.getNeighborEntities(); + Real pom = 0; + //const IndexType& c = cell.getIndex(); + const IndexType e = neighbors.template getEntityIndex< 1 >(); + if( c * input[ e ] <= 0 ) { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - const auto& neighbors = cell.getNeighborEntities(); - Real pom = 0; - //const IndexType& c = cell.getIndex(); - const IndexType e = neighbors.template getEntityIndex< 1 >(); - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( h * c )/( c - input[ e ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; - - pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) - output[ e ] = pom; - - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - } + pom = TNL::sign( c )*( h * c )/( c - input[ e ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + + pom = pom - TNL::sign( c )*h; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) + output[ e ] = pom; + + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; } + } } + } } template< typename Real, - typename Device, - typename Index > -void + typename Device, + typename Index > +template< int sizeSArray > +void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateBlocks( InterfaceMapType interfaceMap, - MeshFunctionType aux, - ArrayContainer BlockIterHost, int numThreadsPerBlock ) + MeshFunctionType aux, + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) { +#pragma omp parallel for schedule( dynamic ) for( int i = 0; i < BlockIterHost.getSize(); i++ ) { if( BlockIterHost[ i ] ) { MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); - + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - int numOfBlockx = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); - int numOfBlocky = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; + int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; int xkolik = numThreadsPerBlock + 1; int ykolik = numThreadsPerBlock + 1; int blIdx = i%numOfBlockx; - int blIdy = i/numOfBlocky; + int blIdy = i/numOfBlockx; + //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*numThreadsPerBlock+1; if( numOfBlocky -1 == blIdy ) ykolik = dimY - (blIdy)*numThreadsPerBlock+1; - - + //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; - changed[ 0 ] = 1;*/ + changed[ 0 ] = 1;*/ Real hx = mesh.getSpaceSteps().x(); Real hy = mesh.getSpaceSteps().y(); - Real changed1[ 16*16 ]; - /*Real changed2[ 16*16 ]; - Real changed3[ 16*16 ]; - Real changed4[ 16*16 ];*/ - Real sArray[18][18]; + bool changed = false; + + + RealType *sArray; + sArray = new Real[ sizeSArray * sizeSArray ]; + if( sArray == nullptr ) + std::cout << "Error while allocating memory for sArray." << std::endl; + + for( int thri = 0; thri < sizeSArray; thri++ ){ + for( int thrj = 0; thrj < sizeSArray; thrj++ ) + sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); + } - for( int thri = 0; thri < numThreadsPerBlock + 2; thri++ ) - for( int thrj = 0; thrj < numThreadsPerBlock + 2; thrj++ ) - sArray[thrj][thri] = std::numeric_limits< Real >::max(); - BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; - + for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) - sArray[thrj+1][xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; - else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); - - + sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; + + if( blIdx != 0 && thrj+1 < ykolik ) - sArray[thrj+1][0] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; - else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); - + sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) - sArray[ykolik][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; - else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); - + sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; + if( blIdy != 0 && thrj+1 < xkolik ) - sArray[0][thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); + sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; } - - for( int k = 0; k < numThreadsPerBlock; k++ ) - for( int l = 0; l < numThreadsPerBlock; l++ ) - sArray[k+1][l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; - - for( int k = 0; k < numThreadsPerBlock; k++ ) + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) + sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; + } + bool pom = false; + for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ){ - changed1[ k*numThreadsPerBlock + l ] = 0; - /*changed2[ k*numThreadsPerBlock + l ] = 0; - changed3[ k*numThreadsPerBlock + l ] = 0; - changed4[ k*numThreadsPerBlock + l ] = 0;*/ - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) - { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){ + //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - changed1[ k*numThreadsPerBlock + l ] = this->updateCell( sArray, l+1, k+1, hx,hy); + pom = this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + changed = changed || pom; } } } - - for( int k = numThreadsPerBlock-1; k > -1; k-- ) - for( int l = 0; l < numThreadsPerBlock; l++ ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + } + /*aux.save( "aux-1pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + + for( int k = 0; k < numThreadsPerBlock; k++ ) + for( int l = numThreadsPerBlock-1; l >-1; l-- ) { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - /*changed2[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); } } } - - for( int k = 0; k < numThreadsPerBlock; k++ ) - for( int l = numThreadsPerBlock-1; l >-1; l-- ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + /*aux.save( "aux-2pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + + for( int k = numThreadsPerBlock-1; k > -1; k-- ) + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - /*changed3[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); } } } - - for( int k = numThreadsPerBlock-1; k > -1; k-- ) + /*aux.save( "aux-3pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ for( int l = numThreadsPerBlock-1; l >-1; l-- ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - /*changed4[ k*numThreadsPerBlock + l ] = */this->updateCell( sArray, l+1, k+1, hx,hy); + this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); } } } - - for( int k = numThreadsPerBlock-1; k > -1; k-- ) - for( int l = numThreadsPerBlock-1; l >-1; l-- ){ - changed1[ 0 ] = changed1[ 0 ] || changed1[ k*numThreadsPerBlock + l ]; - /*changed2[ 0 ] = changed2[ 0 ] || changed2[ k*numThreadsPerBlock + l ]; - changed3[ 0 ] = changed3[ 0 ] || changed3[ k*numThreadsPerBlock + l ]; - changed4[ 0 ] = changed4[ 0 ] || changed4[ k*numThreadsPerBlock + l ];*/ + } + /*aux.save( "aux-4pruch.tnl" ); + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; } + std::cout << std::endl; + }*/ + - if( changed1[ 0 ] /*|| changed2[ 0 ] ||changed3[ 0 ] ||changed4[ 0 ]*/ ) + if( changed ){ BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 1; - + } + + for( int k = 0; k < numThreadsPerBlock; k++ ){ - for( int l = 0; l < numThreadsPerBlock; l++ ) { - if( blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l < dimX*dimY && - (!interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]) ) - aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ k + 1 ][ l + 1 ]; + for( int l = 0; l < numThreadsPerBlock; l++ ) { + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) + helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ]; //std::cout<< sArray[k+1][l+1]; } //std::cout< + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) @@ -249,643 +275,643 @@ getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ) for(int i = 0; i < numBlockX * numBlockY; i++) { - BlockIterPom[ i ] = 0; - if( BlockIterHost[ i ] ) - { - // i = k*numBlockY + m; - int m=0, k=0; - m = i%numBlockX; - k = i/numBlockX; - if( k > 0 ) - BlockIterPom[i - numBlockX] = 1; - if( k < numBlockY - 1 ) - BlockIterPom[i + numBlockX] = 1; - - if( m < numBlockX - 1 ) - BlockIterPom[ i+1 ] = 1; - if( m > 0 ) - BlockIterPom[ i-1 ] = 1; + BlockIterPom[ i ] = 0;//BlockIterPom[ i ] = 0; + int m=0, k=0; + m = i%numBlockX; + k = i/numBlockX; + if( m > 0 && BlockIterHost[ i - 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ + BlockIterPom[ i ] = 1; } + //BlockIterPom[ i ]; } - for(int i = 0; i < numBlockX * numBlockY; i++ ) - //if( !BlockIter[ i ] ) - BlockIterHost[ i ] = BlockIterPom[ i ]; - /*else - BlockIter[ i ] = 0;*/ - /*for( int i = numBlockX-1; i > -1; i-- ) + + for(int i = 0; i < numBlockX * numBlockY; i++) { - for( int j = 0; j< numBlockY; j++ ) - std::cout << BlockIterHost[ i*numBlockY + j ]; - std::cout << std::endl; + if( !BlockIterHost[ i ] ) + BlockIterHost[ i ] = BlockIterPom[ i ]; } - std::cout << std::endl;*/ + /*else + BlockIter[ i ] = 0;*/ + /*for( int i = numBlockX-1; i > -1; i-- ) + { + for( int j = 0; j< numBlockY; j++ ) + std::cout << BlockIterHost[ i*numBlockY + j ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ delete[] BlockIterPom; } template< typename Real, - typename Device, - typename Index > - template< typename MeshEntity > + typename Device, + typename Index > +template< typename MeshEntity > void tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType v ) + const MeshEntity& cell, + const RealType v ) { - const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); - const MeshType& mesh = cell.getMesh(); - const RealType& h = mesh.getSpaceSteps().x(); - const RealType value = u( cell ); - RealType a, tmp = std::numeric_limits< RealType >::max(); - - if( cell.getCoordinates().x() == 0 ) - a = u[ neighborEntities.template getEntityIndex< 1 >() ]; - else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) - a = u[ neighborEntities.template getEntityIndex< -1 >() ]; - else - { - a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], - u[ neighborEntities.template getEntityIndex< 1 >() ] ); - } - - if( fabs( a ) == std::numeric_limits< RealType >::max() ) - return; - - tmp = a + TNL::sign( value ) * h/v; - - u[ cell.getIndex() ] = argAbsMin( value, tmp ); + const auto& neighborEntities = cell.template getNeighborEntities< 1 >(); + const MeshType& mesh = cell.getMesh(); + const RealType& h = mesh.getSpaceSteps().x(); + const RealType value = u( cell ); + RealType a, tmp = std::numeric_limits< RealType >::max(); + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1 >() ], + u[ neighborEntities.template getEntityIndex< 1 >() ] ); + } + + if( fabs( a ) == std::numeric_limits< RealType >::max() ) + return; + + tmp = a + TNL::sign( value ) * h/v; + + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, - MeshFunctionPointer& _output, - InterfaceMapPointer& _interfaceMap ) + MeshFunctionPointer& _output, + InterfaceMapPointer& _interfaceMap ) { - - if( std::is_same< Device, Devices::Cuda >::value ) - { + + if( std::is_same< Device, Devices::Cuda >::value ) + { #ifdef HAVE_CUDA - const MeshType& mesh = _input->getMesh(); - - const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); - dim3 blockSize( cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY ); - Devices::Cuda::synchronizeDevice(); - CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), - _output.template modifyData< Device >(), - _interfaceMap.template modifyData< Device >() ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + dim3 blockSize( cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; #endif - } - if( std::is_same< Device, Devices::Host >::value ) - { - MeshFunctionType input = _input.getData(); - - /*double A[320][320]; - std::ifstream fileInit("/home/maty/Downloads/initData.txt"); - - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - fileInit >> A[i][j]; - fileInit.close(); - for (int i = 0; i < 320; i++) - for (int j = 0; j < 320; j++) - input[i*320 + j] = A[i][j];*/ - - - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - const MeshType& mesh = input.getMesh(); - typedef typename MeshType::Cell Cell; - Cell cell( mesh ); - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); - interfaceMap[ cell.getIndex() ] = false; - } - - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - for( cell.getCoordinates().y() = 0; + } + if( std::is_same< Device, Devices::Host >::value ) + { + MeshFunctionType input = _input.getData(); + + /*double A[320][320]; + std::ifstream fileInit("/home/maty/Downloads/initData.txt"); + + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + fileInit >> A[j]; + fileInit.close(); + for (int i = 0; i < 320; i++) + for (int j = 0; j < 320; j++) + input[i*320 + j] = A[j];*/ + + + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + const MeshType& mesh = input.getMesh(); + typedef typename MeshType::Cell Cell; + Cell cell( mesh ); + for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) >= 0 ? std::numeric_limits< RealType >::max() : + - std::numeric_limits< RealType >::max(); + interfaceMap[ cell.getIndex() ] = false; + } + + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const IndexType e = neighbors.template getEntityIndex< 1, 0 >(); + const IndexType n = neighbors.template getEntityIndex< 0, 1 >(); + //Try init with exact data: + /*if( c * input[ n ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ n ] = input[ n ]; + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; + } + if( c * input[ e ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ e ] = input[ e ]; + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + }*/ + if( c * input[ n ] <= 0 ) { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) + /*if( c >= 0 ) + {*/ + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + pom = pom - TNL::sign( c )*hy; + if( TNL::abs( output[ n ] ) > TNL::abs( pom ) ) + output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy; + /*}else { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const IndexType e = neighbors.template getEntityIndex< 1, 0 >(); - const IndexType n = neighbors.template getEntityIndex< 0, 1 >(); - //Try init with exact data: - /*if( c * input[ n ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ n ] = input[ n ]; - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ e ] = input[ e ]; - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - }*/ - if( c * input[ n ] <= 0 ) - { - /*if( c >= 0 ) - {*/ - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; - pom = pom - TNL::sign( c )*hy; - if( TNL::abs( output[ n ] ) > TNL::abs( pom ) ) - output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy; - /*}else - { - pom = - ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - if( output[ n ] > hy + pom ) - output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); - }*/ - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - /*if( c >= 0 ) - {*/ - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) - output[ cell.getIndex() ] = pom; - - pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) - output[ e ] = pom; - /*}else - { - pom = - (hx * c)/( c - input[ e ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ e ] > pom ) - output[ e ] = pom; - }*/ - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - } + pom = - ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + if( output[ n ] > hy + pom ) + output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); + }*/ + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; } + if( c * input[ e ] <= 0 ) + { + /*if( c >= 0 ) + {*/ + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) + output[ cell.getIndex() ] = pom; + + pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) + output[ e ] = pom; + /*}else + { + pom = - (hx * c)/( c - input[ e ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ e ] > pom ) + output[ e ] = pom; + }*/ + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + } + } } + } } template< typename Real, - typename Device, - typename Index > - template< typename MeshEntity > + typename Device, + typename Index > +template< typename MeshEntity > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType v) + const MeshEntity& cell, + const RealType v) { - const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); - const MeshType& mesh = cell.getMesh(); - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType value = u( cell ); - RealType a, b, tmp = std::numeric_limits< RealType >::max(); - - if( cell.getCoordinates().x() == 0 ) - a = u[ neighborEntities.template getEntityIndex< 1, 0 >() ]; - else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) - a = u[ neighborEntities.template getEntityIndex< -1, 0 >() ]; - else - { - a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0 >() ], - u[ neighborEntities.template getEntityIndex< 1, 0 >() ] ); - } - - if( cell.getCoordinates().y() == 0 ) - b = u[ neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) - b = u[ neighborEntities.template getEntityIndex< 0, -1 >() ]; - else - { - b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1 >() ], - u[ neighborEntities.template getEntityIndex< 0, 1 >() ] ); - } - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() ) - return; - /*if( fabs( a ) == TypeInfo< Real >::getMaxValue() || - fabs( b ) == TypeInfo< Real >::getMaxValue() || - fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) + const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); + const MeshType& mesh = cell.getMesh(); + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType value = u( cell ); + RealType a, b, tmp = std::numeric_limits< RealType >::max(); + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1, 0 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1, 0 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0 >() ], + u[ neighborEntities.template getEntityIndex< 1, 0 >() ] ); + } + + if( cell.getCoordinates().y() == 0 ) + b = u[ neighborEntities.template getEntityIndex< 0, 1 >()]; + else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) + b = u[ neighborEntities.template getEntityIndex< 0, -1 >() ]; + else + { + b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1 >() ], + u[ neighborEntities.template getEntityIndex< 0, 1 >() ] ); + } + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() ) + return; + /*if( fabs( a ) == TypeInfo< Real >::getMaxValue() || + fabs( b ) == TypeInfo< Real >::getMaxValue() || + fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) { - tmp = - fabs( a ) >= fabs( b ) ? b + TNL::sign( value ) * hy : - a + TNL::sign( value ) * hx; + tmp = + fabs( a ) >= fabs( b ) ? b + TNL::sign( value ) * hy : + a + TNL::sign( value ) * hx; }*/ - /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && - fabs( b ) != TypeInfo< Real >::getMaxValue() && - fabs( a - b ) < TNL::sqrt( (hx * hx + hy * hy)/v ) ) + /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && + fabs( b ) != TypeInfo< Real >::getMaxValue() && + fabs( a - b ) < TNL::sqrt( (hx * hx + hy * hy)/v ) ) { - tmp = ( hx * hx * b + hy * hy * a + - sign( value ) * hx * hy * TNL::sqrt( ( hx * hx + hy * hy )/v - - ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); - u[ cell.getIndex() ] = tmp; + tmp = ( hx * hx * b + hy * hy * a + + sign( value ) * hx * hy * TNL::sqrt( ( hx * hx + hy * hy )/v - + ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); + u[ cell.getIndex() ] = tmp; } else { - tmp = - fabs( a ) > fabs( b ) ? b + TNL::sign( value ) * hy/v : - a + TNL::sign( value ) * hx/v; - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - //tmp = TypeInfo< RealType >::getMaxValue(); + tmp = + fabs( a ) > fabs( b ) ? b + TNL::sign( value ) * hy/v : + a + TNL::sign( value ) * hx/v; + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + //tmp = TypeInfo< RealType >::getMaxValue(); }*/ - RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; - - - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - } + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + } } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: initInterface( const MeshFunctionPointer& _input, - MeshFunctionPointer& _output, - InterfaceMapPointer& _interfaceMap ) + MeshFunctionPointer& _output, + InterfaceMapPointer& _interfaceMap ) { - if( std::is_same< Device, Devices::Cuda >::value ) - { + if( std::is_same< Device, Devices::Cuda >::value ) + { #ifdef HAVE_CUDA - const MeshType& mesh = _input->getMesh(); - - const int cudaBlockSize( 8 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); - int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); - if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) - std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; - dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); - Devices::Cuda::synchronizeDevice(); - CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(), - _output.template modifyData< Device >(), - _interfaceMap.template modifyData< Device >() ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; + const MeshType& mesh = _input->getMesh(); + + const int cudaBlockSize( 8 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); + if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) + std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; + dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); + Devices::Cuda::synchronizeDevice(); + CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(), + _output.template modifyData< Device >(), + _interfaceMap.template modifyData< Device >() ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; #endif - } - if( std::is_same< Device, Devices::Host >::value ) - { - const MeshFunctionType& input = _input.getData(); - MeshFunctionType& output = _output.modifyData(); - InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); - const MeshType& mesh = input.getMesh(); - typedef typename MeshType::Cell Cell; - Cell cell( mesh ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh.getDimensions().z(); - cell.getCoordinates().z() ++ ) - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) - { - cell.refresh(); - output[ cell.getIndex() ] = - input( cell ) > 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); - interfaceMap[ cell.getIndex() ] = false; - } - - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType& hz = mesh.getSpaceSteps().z(); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh.getDimensions().z(); - cell.getCoordinates().z() ++ ) - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh.getDimensions().y(); - cell.getCoordinates().y() ++ ) - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh.getDimensions().x(); - cell.getCoordinates().x() ++ ) + } + if( std::is_same< Device, Devices::Host >::value ) + { + const MeshFunctionType& input = _input.getData(); + MeshFunctionType& output = _output.modifyData(); + InterfaceMapType& interfaceMap = _interfaceMap.modifyData(); + const MeshType& mesh = input.getMesh(); + typedef typename MeshType::Cell Cell; + Cell cell( mesh ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh.getDimensions().z(); + cell.getCoordinates().z() ++ ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + output[ cell.getIndex() ] = + input( cell ) > 0 ? std::numeric_limits< RealType >::max() : + - std::numeric_limits< RealType >::max(); + interfaceMap[ cell.getIndex() ] = false; + } + + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType& hz = mesh.getSpaceSteps().z(); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh.getDimensions().z(); + cell.getCoordinates().z() ++ ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh.getDimensions().y(); + cell.getCoordinates().y() ++ ) + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh.getDimensions().x(); + cell.getCoordinates().x() ++ ) + { + cell.refresh(); + const RealType& c = input( cell ); + if( ! cell.isBoundaryEntity() ) + { + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const IndexType e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const IndexType n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const IndexType t = neighbors.template getEntityIndex< 0, 0, 1 >(); + //Try exact initiation + /*const IndexType w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const IndexType s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const IndexType b = neighbors.template getEntityIndex< 0, 0, -1 >(); + if( c * input[ e ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ e ] = input[ e ]; + interfaceMap[ e ] = true; + interfaceMap[ cell.getIndex() ] = true; + } + else if( c * input[ n ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ n ] = input[ n ]; + interfaceMap[ n ] = true; + interfaceMap[ cell.getIndex() ] = true; + } + else if( c * input[ t ] <= 0 ) + { + output[ cell.getIndex() ] = c; + output[ t ] = input[ t ]; + interfaceMap[ t ] = true; + interfaceMap[ cell.getIndex() ] = true; + }*/ + if( c * input[ n ] <= 0 ) + { + if( c >= 0 ) { - cell.refresh(); - const RealType& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const IndexType e = neighbors.template getEntityIndex< 1, 0, 0 >(); - const IndexType n = neighbors.template getEntityIndex< 0, 1, 0 >(); - const IndexType t = neighbors.template getEntityIndex< 0, 0, 1 >(); - //Try exact initiation - /*const IndexType w = neighbors.template getEntityIndex< -1, 0, 0 >(); - const IndexType s = neighbors.template getEntityIndex< 0, -1, 0 >(); - const IndexType b = neighbors.template getEntityIndex< 0, 0, -1 >(); - if( c * input[ e ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ e ] = input[ e ]; - interfaceMap[ e ] = true; - interfaceMap[ cell.getIndex() ] = true; - } - else if( c * input[ n ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ n ] = input[ n ]; - interfaceMap[ n ] = true; - interfaceMap[ cell.getIndex() ] = true; - } - else if( c * input[ t ] <= 0 ) - { - output[ cell.getIndex() ] = c; - output[ t ] = input[ t ]; - interfaceMap[ t ] = true; - interfaceMap[ cell.getIndex() ] = true; - }*/ - if( c * input[ n ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - if ( output[ n ] < pom - hy) - output[ n ] = pom - hy; // ( hy * c )/( c - input[ n ]) - hy; - - }else - { - pom = - ( hy * c )/( c - input[ n ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - if( output[ n ] > hy + pom ) - output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); - - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ n ] = true; - } - if( c * input[ e ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hx * c )/( c - input[ e ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( output[ e ] < pom ) - output[ e ] = pom; - - }else - { - pom = - (hx * c)/( c - input[ e ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ e ] > pom ) - output[ e ] = pom; - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ e ] = true; - } - if( c * input[ t ] <= 0 ) - { - if( c >= 0 ) - { - pom = ( hz * c )/( c - input[ t ]); - if( output[ cell.getIndex() ] > pom ) - output[ cell.getIndex() ] = pom; - - pom = pom - hz; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; - if( output[ t ] < pom ) - output[ t ] = pom; - - }else - { - pom = - (hz * c)/( c - input[ t ]); - if( output[ cell.getIndex() ] < pom ) - output[ cell.getIndex() ] = pom; - - pom = pom + hz; //output[ e ] = hx - (hx * c)/( c - input[ e ]); - if( output[ t ] > pom ) - output[ t ] = pom; - - } - interfaceMap[ cell.getIndex() ] = true; - interfaceMap[ t ] = true; - } - } - /*output[ cell.getIndex() ] = - c > 0 ? TypeInfo< RealType >::getMaxValue() : - -TypeInfo< RealType >::getMaxValue(); - interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245 + pom = ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + if ( output[ n ] < pom - hy) + output[ n ] = pom - hy; // ( hy * c )/( c - input[ n ]) - hy; + + }else + { + pom = - ( hy * c )/( c - input[ n ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + if( output[ n ] > hy + pom ) + output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); + } - } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ n ] = true; + } + if( c * input[ e ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hx * c )/( c - input[ e ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( output[ e ] < pom ) + output[ e ] = pom; + + }else + { + pom = - (hx * c)/( c - input[ e ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ e ] > pom ) + output[ e ] = pom; + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ e ] = true; + } + if( c * input[ t ] <= 0 ) + { + if( c >= 0 ) + { + pom = ( hz * c )/( c - input[ t ]); + if( output[ cell.getIndex() ] > pom ) + output[ cell.getIndex() ] = pom; + + pom = pom - hz; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; + if( output[ t ] < pom ) + output[ t ] = pom; + + }else + { + pom = - (hz * c)/( c - input[ t ]); + if( output[ cell.getIndex() ] < pom ) + output[ cell.getIndex() ] = pom; + + pom = pom + hz; //output[ e ] = hx - (hx * c)/( c - input[ e ]); + if( output[ t ] > pom ) + output[ t ] = pom; + + } + interfaceMap[ cell.getIndex() ] = true; + interfaceMap[ t ] = true; + } + } + /*output[ cell.getIndex() ] = + c > 0 ? TypeInfo< RealType >::getMaxValue() : + -TypeInfo< RealType >::getMaxValue(); + interfaceMap[ cell.getIndex() ] = false;*/ //is on line 245 + } + } } template< typename Real, - typename Device, - typename Index > - template< typename MeshEntity > + typename Device, + typename Index > +template< typename MeshEntity > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType v ) + const MeshEntity& cell, + const RealType v ) { - const auto& neighborEntities = cell.template getNeighborEntities< 3 >(); - const MeshType& mesh = cell.getMesh(); + const auto& neighborEntities = cell.template getNeighborEntities< 3 >(); + const MeshType& mesh = cell.getMesh(); - const RealType& hx = mesh.getSpaceSteps().x(); - const RealType& hy = mesh.getSpaceSteps().y(); - const RealType& hz = mesh.getSpaceSteps().z(); - const RealType value = u( cell ); - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - - if( cell.getCoordinates().x() == 0 ) - a = u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ]; - else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) - a = u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ]; - else + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y(); + const RealType& hz = mesh.getSpaceSteps().z(); + const RealType value = u( cell ); + //std::cout << value << std::endl; + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + + if( cell.getCoordinates().x() == 0 ) + a = u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ]; + else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 ) + a = u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ]; + else + { + a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ], + u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] ); + } + if( cell.getCoordinates().y() == 0 ) + b = u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ]; + else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) + b = u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ]; + else + { + b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ], + u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] ); + }if( cell.getCoordinates().z() == 0 ) + c = u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ]; + else if( cell.getCoordinates().z() == mesh.getDimensions().z() - 1 ) + c = u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ]; + else + { + c = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ], + u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] ); + } + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() && + fabs( c ) == std::numeric_limits< RealType >::max() ) + return; + + + /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && + fabs( b ) != TypeInfo< Real >::getMaxValue() && + fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) { - a = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ], - u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] ); + tmp = ( hx * hx * a + hy * hy * b + + sign( value ) * hx * hy * sqrt( ( hx * hx + hy * hy )/v - + ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); } - if( cell.getCoordinates().y() == 0 ) - b = u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ]; - else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 ) - b = u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ]; - else - { - b = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ], - u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] ); - }if( cell.getCoordinates().z() == 0 ) - c = u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ]; - else if( cell.getCoordinates().z() == mesh.getDimensions().z() - 1 ) - c = u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ]; - else + if( fabs( a ) != TypeInfo< Real >::getMaxValue() && + fabs( c ) != TypeInfo< Real >::getMaxValue() && + fabs( a - c ) >= TNL::sqrt( (hx * hx + hz * hz)/v ) ) { - c = TNL::argAbsMin( u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ], - u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] ); + tmp = ( hx * hx * a + hz * hz * c + + sign( value ) * hx * hz * sqrt( ( hx * hx + hz * hz )/v - + ( a - c ) * ( a - c ) ) )/( hx * hx + hz * hz ); } - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) - return; - - - /*if( fabs( a ) != TypeInfo< Real >::getMaxValue() && - fabs( b ) != TypeInfo< Real >::getMaxValue() && - fabs( a - b ) >= TNL::sqrt( (hx * hx + hy * hy)/v ) ) - { - tmp = ( hx * hx * a + hy * hy * b + - sign( value ) * hx * hy * sqrt( ( hx * hx + hy * hy )/v - - ( a - b ) * ( a - b ) ) )/( hx * hx + hy * hy ); - } - if( fabs( a ) != TypeInfo< Real >::getMaxValue() && - fabs( c ) != TypeInfo< Real >::getMaxValue() && - fabs( a - c ) >= TNL::sqrt( (hx * hx + hz * hz)/v ) ) - { - tmp = ( hx * hx * a + hz * hz * c + - sign( value ) * hx * hz * sqrt( ( hx * hx + hz * hz )/v - - ( a - c ) * ( a - c ) ) )/( hx * hx + hz * hz ); - } - if( fabs( b ) != TypeInfo< Real >::getMaxValue() && - fabs( c ) != TypeInfo< Real >::getMaxValue() && - fabs( b - c ) >= TNL::sqrt( (hy * hy + hz * hz)/v ) ) - { - tmp = ( hy * hy * b + hz * hz * c + - sign( value ) * hy * hz * sqrt( ( hy * hy + hz * hz )/v - - ( b - c ) * ( b - c ) ) )/( hy * hy + hz * hz ); - }*/ - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + if( fabs( b ) != TypeInfo< Real >::getMaxValue() && + fabs( c ) != TypeInfo< Real >::getMaxValue() && + fabs( b - c ) >= TNL::sqrt( (hy * hy + hz * hz)/v ) ) + { + tmp = ( hy * hy * b + hz * hz * c + + sign( value ) * hy * hz * sqrt( ( hy * hy + hz * hz )/v - + ( b - c ) * ( b - c ) ) )/( hy * hy + hz * hz ); + }*/ + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + u[ cell.getIndex() ] = argAbsMin( value, tmp ); + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) { - u[ cell.getIndex() ] = argAbsMin( value, tmp ); + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } else { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - u[ cell.getIndex() ] = argAbsMin( value, tmp ); - } + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + u[ cell.getIndex() ] = argAbsMin( value, tmp ); } + } } template < typename T1, typename T2 > T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double v) { - T1 tmp; - if( fabs( a ) != std::numeric_limits< T1 >::max && - fabs( b ) != std::numeric_limits< T1 >::max && - fabs( a - b ) < ha/v )//TNL::sqrt( (ha * ha + hb * hb)/2 )/v ) - { - tmp = ( ha * ha * b + hb * hb * a + + T1 tmp; + if( fabs( a ) != std::numeric_limits< T1 >::max && + fabs( b ) != std::numeric_limits< T1 >::max && + fabs( a - b ) < ha/v )//TNL::sqrt( (ha * ha + hb * hb)/2 )/v ) + { + tmp = ( ha * ha * b + hb * hb * a + TNL::sign( value ) * ha * hb * TNL::sqrt( ( ha * ha + hb * hb )/( v * v ) - ( a - b ) * ( a - b ) ) )/( ha * ha + hb * hb ); - } - else - { - tmp = std::numeric_limits< T1 >::max; - } - - return tmp; + } + else + { + tmp = std::numeric_limits< T1 >::max; + } + + return tmp; } template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ) { - T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; - if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ - tmp[0] = pom[0]; tmp[1] = pom[1]; tmp[2] = pom[2]; - tmp[3] = pom[3]; tmp[4] = pom[4]; tmp[5] = pom[5]; - - } - else if( fabs(pom[0]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[1]) ){ - tmp[0] = pom[0]; tmp[1] = pom[2]; tmp[2] = pom[1]; - tmp[3] = pom[3]; tmp[4] = pom[5]; tmp[5] = pom[4]; - } - else if( fabs(pom[1]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[2]) ){ - tmp[0] = pom[1]; tmp[1] = pom[0]; tmp[2] = pom[2]; - tmp[3] = pom[4]; tmp[4] = pom[3]; tmp[5] = pom[5]; - } - else if( fabs(pom[1]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[0]) ){ - tmp[0] = pom[1]; tmp[1] = pom[2]; tmp[2] = pom[0]; - tmp[3] = pom[4]; tmp[4] = pom[5]; tmp[5] = pom[3]; - } - else if( fabs(pom[2]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[1]) ){ - tmp[0] = pom[2]; tmp[1] = pom[0]; tmp[2] = pom[1]; - tmp[3] = pom[5]; tmp[4] = pom[3]; tmp[5] = pom[4]; - } - else if( fabs(pom[2]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[0]) ){ - tmp[0] = pom[2]; tmp[1] = pom[1]; tmp[2] = pom[0]; - tmp[3] = pom[5]; tmp[4] = pom[4]; tmp[5] = pom[3]; - } + T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; + if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ + tmp[0] = pom[0]; tmp[1] = pom[1]; tmp[2] = pom[2]; + tmp[3] = pom[3]; tmp[4] = pom[4]; tmp[5] = pom[5]; - for( int i = 0; i < 6; i++ ) - { - pom[ i ] = tmp[ i ]; - } + } + else if( fabs(pom[0]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[1]) ){ + tmp[0] = pom[0]; tmp[1] = pom[2]; tmp[2] = pom[1]; + tmp[3] = pom[3]; tmp[4] = pom[5]; tmp[5] = pom[4]; + } + else if( fabs(pom[1]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[2]) ){ + tmp[0] = pom[1]; tmp[1] = pom[0]; tmp[2] = pom[2]; + tmp[3] = pom[4]; tmp[4] = pom[3]; tmp[5] = pom[5]; + } + else if( fabs(pom[1]) <= fabs(pom[2]) && fabs(pom[2]) <= fabs(pom[0]) ){ + tmp[0] = pom[1]; tmp[1] = pom[2]; tmp[2] = pom[0]; + tmp[3] = pom[4]; tmp[4] = pom[5]; tmp[5] = pom[3]; + } + else if( fabs(pom[2]) <= fabs(pom[0]) && fabs(pom[0]) <= fabs(pom[1]) ){ + tmp[0] = pom[2]; tmp[1] = pom[0]; tmp[2] = pom[1]; + tmp[3] = pom[5]; tmp[4] = pom[3]; tmp[5] = pom[4]; + } + else if( fabs(pom[2]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[0]) ){ + tmp[0] = pom[2]; tmp[1] = pom[1]; tmp[2] = pom[0]; + tmp[3] = pom[5]; tmp[4] = pom[4]; tmp[5] = pom[3]; + } + + for( int i = 0; i < 6; i++ ) + { + pom[ i ] = tmp[ i ]; + } } @@ -893,372 +919,373 @@ __cuda_callable__ void sortMinims( T1 pom[] ) #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + int i = threadIdx.x + blockDim.x*blockIdx.x; + const Meshes::Grid< 1, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() ) + { + typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; + cell.refresh(); + const Index cind = cell.getIndex(); + - if( i < mesh.getDimensions().x() ) + output[ cind ] = + input( cell ) >= 0 ? std::numeric_limits< Real >::max() : + - std::numeric_limits< Real >::max(); + interfaceMap[ cind ] = false; + + const Real& h = mesh.getSpaceSteps().x(); + cell.refresh(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - typedef typename Meshes::Grid< 1, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? std::numeric_limits< Real >::max() : - - std::numeric_limits< Real >::max(); - interfaceMap[ cind ] = false; - - const Real& h = mesh.getSpaceSteps().x(); - cell.refresh(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1 >(); - const Index w = neighbors.template getEntityIndex< -1 >(); - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( h * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( h * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1 >(); + const Index w = neighbors.template getEntityIndex< -1 >(); + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( h * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } - + } + } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - const Meshes::Grid< 2, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + const Meshes::Grid< 2, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + { + typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; + cell.refresh(); + const Index cind = cell.getIndex(); + - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + output[ cind ] = + input( cell ) >= 0 ? std::numeric_limits< Real >::max() : + - std::numeric_limits< Real >::max(); + interfaceMap[ cind ] = false; + + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + cell.refresh(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? std::numeric_limits< Real >::max() : - - std::numeric_limits< Real >::max(); - interfaceMap[ cind ] = false; - - const Real& hx = mesh.getSpaceSteps().x(); - const Real& hy = mesh.getSpaceSteps().y(); - cell.refresh(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1, 0 >(); - const Index w = neighbors.template getEntityIndex< -1, 0 >(); - const Index n = neighbors.template getEntityIndex< 0, 1 >(); - const Index s = neighbors.template getEntityIndex< 0, -1 >(); - - if( c * input[ n ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cell.getIndex() ] = true; - } - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ s ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1 >(); + const Index s = neighbors.template getEntityIndex< 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cell.getIndex() ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } + } } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - int k = blockDim.z*blockIdx.z + threadIdx.z; - const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + int k = blockDim.z*blockIdx.z + threadIdx.z; + const Meshes::Grid< 3, Real, Device, Index >& mesh = input.template getMesh< Devices::Cuda >(); + + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + { + typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; + cell.refresh(); + const Index cind = cell.getIndex(); + + + output[ cind ] = + input( cell ) >= 0 ? std::numeric_limits< Real >::max() : + - std::numeric_limits< Real >::max(); + interfaceMap[ cind ] = false; + cell.refresh(); - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + const Real& hx = mesh.getSpaceSteps().x(); + const Real& hy = mesh.getSpaceSteps().y(); + const Real& hz = mesh.getSpaceSteps().z(); + const Real& c = input( cell ); + if( ! cell.isBoundaryEntity() ) { - typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; - cell.refresh(); - const Index cind = cell.getIndex(); - - - output[ cind ] = - input( cell ) >= 0 ? std::numeric_limits< Real >::max() : - - std::numeric_limits< Real >::max(); - interfaceMap[ cind ] = false; - cell.refresh(); - - const Real& hx = mesh.getSpaceSteps().x(); - const Real& hy = mesh.getSpaceSteps().y(); - const Real& hz = mesh.getSpaceSteps().z(); - const Real& c = input( cell ); - if( ! cell.isBoundaryEntity() ) - { - auto neighbors = cell.getNeighborEntities(); - Real pom = 0; - const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); - const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); - const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); - const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); - const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); - const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); - - if( c * input[ n ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ e ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ w ] <= 0 ) - { - pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ s ] <= 0 ) - { - pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ b ] <= 0 ) - { - pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - if( c * input[ t ] <= 0 ) - { - pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); - if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) - output[ cind ] = pom; - - interfaceMap[ cind ] = true; - } - } + auto neighbors = cell.getNeighborEntities(); + Real pom = 0; + const Index e = neighbors.template getEntityIndex< 1, 0, 0 >(); + const Index w = neighbors.template getEntityIndex< -1, 0, 0 >(); + const Index n = neighbors.template getEntityIndex< 0, 1, 0 >(); + const Index s = neighbors.template getEntityIndex< 0, -1, 0 >(); + const Index t = neighbors.template getEntityIndex< 0, 0, 1 >(); + const Index b = neighbors.template getEntityIndex< 0, 0, -1 >(); + + if( c * input[ n ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ e ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ w ] <= 0 ) + { + pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ s ] <= 0 ) + { + pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ b ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ b ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } + if( c * input[ t ] <= 0 ) + { + pom = TNL::sign( c )*( hz * c )/( c - input[ t ]); + if( TNL::abs( output[ cind ] ) > TNL::abs( pom ) ) + output[ cind ] = pom; + + interfaceMap[ cind ] = true; + } } + } } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > +template< int sizeSArray > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: -updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, - const Real v ) +updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, + const Real v ) { - const RealType value = sArray[ thrj ][ thri ]; - RealType a, b, tmp = std::numeric_limits< RealType >::max(); - - b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], - sArray[ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], - sArray[ thrj ][ thri-1 ] ); - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; - - - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } + const RealType value = sArray[ thrj * sizeSArray + thri ]; + RealType a, b, tmp = std::numeric_limits< RealType >::max(); + + b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], + sArray[ (thrj-1) * sizeSArray + thri ] ); + + a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], + sArray[ thrj * sizeSArray + thri-1 ] ); + + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - - return false; + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + + return false; } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >:: updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) { - const RealType value = sArray[ thri ]; - RealType a, tmp = std::numeric_limits< RealType >::max(); - - a = TNL::argAbsMin( sArray[ thri+1 ], - sArray[ thri-1 ] ); - - if( fabs( a ) == std::numeric_limits< RealType >::max() ) - return false; - - tmp = a + TNL::sign( value ) * h/v; - - - sArray[ thri ] = argAbsMin( value, tmp ); - - tmp = value - sArray[ thri ]; - if ( fabs( tmp ) > 0.001*h ) - return true; - else - return false; + const RealType value = sArray[ thri ]; + RealType a, tmp = std::numeric_limits< RealType >::max(); + + a = TNL::argAbsMin( sArray[ thri+1 ], + sArray[ thri-1 ] ); + + if( fabs( a ) == std::numeric_limits< RealType >::max() ) + return false; + + tmp = a + TNL::sign( value ) * h/v; + + + sArray[ thri ] = argAbsMin( value, tmp ); + + tmp = value - sArray[ thri ]; + if ( fabs( tmp ) > 0.001*h ) + return true; + else + return false; } template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > __cuda_callable__ bool tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, const Real v ) { - const RealType value = sArray[thrk][thrj][thri]; - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], - sArray[ thrk-1 ][ thrj ][ thri ] ); - - b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], - sArray[ thrk ][ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], - sArray[ thrk ][ thrj ][ thri-1 ] ); - - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) + const RealType value = sArray[thrk][thrj][thri]; + //std::cout << value << std::endl; + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], + sArray[ thrk-1 ][ thrj ][ thri ] ); + + b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], + sArray[ thrk ][ thrj-1 ][ thri ] ); + + a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], + sArray[ thrk ][ thrj ][ thri-1 ] ); + + + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() && + fabs( c ) == std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + + sortMinims( pom ); + + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk ][ thrj ][ thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else return false; - - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - - sortMinims( pom ); - - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; + sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk ][ thrj ][ thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; } else { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk ][ thrj ][ thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; } - - return false; + } + + return false; } #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index c6cc575d1..0efa38aa1 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -15,6 +15,7 @@ #include "tnlFastSweepingMethod.h" #include +#include #include @@ -80,115 +81,171 @@ solve( const MeshPointer& mesh, - while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 16; + int numThreadsPerBlock = 1024; + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; + + /*Real **sArray = new Real*[numBlocksX*numBlocksY]; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + sArray[ i ] = new Real [ (numThreadsPerBlock + 2)*(numThreadsPerBlock + 2)];*/ - ArrayContainer BlockIterHost; BlockIterHost.setSize( numBlocksX * numBlocksY ); BlockIterHost.setValue( 1 ); + int IsCalculationDone = 1; + + MeshFunctionPointer helpFunc( mesh ); + MeshFunctionPointer helpFunc1( mesh ); + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + //std::cout<< "Size = " << BlockIterHost.getSize() << std::endl; /*for( int k = numBlocksX-1; k >-1; k-- ){ - for( int l = 0; l < numBlocksY; l++ ){ - std::cout<< BlockIterHost[ l*numBlocksX + k ]; + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<template updateBlocks< 1026 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + + for( int i = 0; i < BlockIterHost.getSize(); i++ ){ + if( IsCalculationDone == 0 ){ + IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; + //break; + } } - std::cout<updateBlocks( interfaceMap, aux, BlockIterHost, numThreadsPerBlock); + numWhile++; + + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl; this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY ); - //Reduction - for( int k = numBlocksX-1; k >-1; k-- ){ - for( int l = 0; l < numBlocksY; l++ ){ - //std::cout<< BlockIterHost[ l*numBlocksX + k ]; - BlockIterHost[ 0 ] = BlockIterHost[ 0 ] || BlockIterHost[ l*numBlocksX + k ]; - } - //std::cout<-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << "BlockIterHost = "<< j*numBlocksX + i<< " ," << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + //Reduction + //std::cout<getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - - //aux.save( "aux-1.tnl" ); - - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } + if( numWhile == 1 ){ + auxPtr = helpFunc; } + /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + delete []sArray[i];*/ - //aux.save( "aux-2.tnl" ); - - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - //aux.save( "aux-3.tnl" ); + /*for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-2.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + + for( int j = 0; + j < mesh->getDimensions().y(); + j++ ) + { + for( int i = 0; + i < mesh->getDimensions().x(); + i++ ) + { + std::cout << aux[ i * mesh->getDimensions().y() + j ] << " "; + } + std::cout << std::endl; + }*/ - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - }*/ } if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code #ifdef HAVE_CUDA TNL_CHECK_CUDA_DEVICE; + // Maximum cudaBlockSite is 32. Because of maximum num. of threads in kernel. const int cudaBlockSize( 16 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); @@ -202,19 +259,14 @@ solve( const MeshPointer& mesh, BlockIterDevice.setSize( numBlocksX * numBlocksY ); BlockIterDevice.setValue( 1 ); TNL_CHECK_CUDA_DEVICE; - int ne = 0; - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, ne); - TNL_CHECK_CUDA_DEVICE; + /*TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; - BlockIterPom.setSize( numBlocksX * numBlocksY ); - BlockIterPom.setValue( 0 );*/ + BlockIterPom.setSize( numBlocksX * numBlocksY ); + BlockIterPom.setValue( 0 );*/ /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -223,59 +275,125 @@ solve( const MeshPointer& mesh, /*int *BlockIterPom; cudaMalloc((void**) &BlockIterPom, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ - int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); + int nBlocks = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; - dBlock.setSize( nBlocks ); + dBlock.setSize( nBlocks ); TNL_CHECK_CUDA_DEVICE; /*int *dBlock; cudaMalloc((void**) &dBlock, nBlocks * sizeof( int ) );*/ - //int pocIter = 0; + + + MeshFunctionPointer helpFunc1; + helpFunc1->setMesh(mesh); + + MeshFunctionPointer helpFunc( mesh ); + + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + + int numIter = 0; + + //int oddEvenBlock = 0; while( BlockIterD ) { - /*BlockIterPom1 = BlockIterDevice; - for( int j = numBlocksY-1; j>-1; j-- ){ - for( int i = 0; i < numBlocksX; i++ ) - std::cout << BlockIterPom1[ j * numBlocksX + i ]; - std::cout << std::endl; - } - std::cout << std::endl;*/ + /** HERE IS CHESS METHOD **/ + + /*auxPtr = helpFunc; - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice, 1); + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; - /*int poc = 0; - for( int i = 0; i < numBlocksX * numBlocksY; i++ ) - if( BlockIterPom1[ i ] ) - poc = poc+1; - std::cout << "pocet bloku, ktere se pocitali = " << poc << std::endl;*/ + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, /*BlockIterPom,*/ numBlocksX, numBlocksY ); + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - - BlockIterD = dBlock.getElement( 0 ); - //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + BlockIterD = dBlock.getElement( 0 );*/ + + /**------------------------------------------------------------------------------------------------*/ + + + /** HERE IS FIM **/ + + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + + //int pocBloku = 0; + Devices::Cuda::synchronizeDevice(); + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template modifyData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + //std::cout << "Pocet aktivnich bloku = " << pocBloku << std::endl; + + GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, numBlocksX, numBlocksY ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + //std::cout<< "Probehlo" << std::endl; + + //TNL::swap( auxPtr, helpFunc ); + + + CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + TNL_CHECK_CUDA_DEVICE; + + + BlockIterD = dBlock.getElement( 0 ); + //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + + /**-----------------------------------------------------------------------------------------------------------*/ /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - //pocIter ++; + numIter ++; } + if( numIter == 1 ){ + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + } + /*cudaFree( BlockIterDevice ); + cudaFree( dBlock ); + delete BlockIter;*/ cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //std::cout<< pocIter << std::endl; + TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; interfaceMap = *interfaceMapPtr; @@ -286,10 +404,13 @@ solve( const MeshPointer& mesh, aux.save("aux-final.tnl"); } + #ifdef HAVE_CUDA + + template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) + /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) { int i = blockIdx.x * 1024 + threadIdx.x; @@ -299,53 +420,68 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index int m=0, k=0; m = i%numBlockX; k = i/numBlockX; - if( m > 0 ) - if( BlockIterDevice[ i - 1 ] ) - pom = 1;//BlockIterPom[ i ] = 1; - if( m < numBlockX -1 && pom == 0 ) - if( BlockIterDevice[ i + 1 ] ) - pom = 1;//BlockIterPom[ i ] = 1; - if( k > 0 && pom == 0 ) - if( BlockIterDevice[ i - numBlockX ] ) - pom = 1;// BlockIterPom[ i ] = 1; - if( k < numBlockY -1 && pom == 0 ) - if( BlockIterDevice[ i + numBlockX ] ) - pom = 1;//BlockIterPom[ i ] = 1; + if( m > 0 && BlockIterDevice[ i - 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterDevice[ i + 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterDevice[ i - numBlockX ] ){ + pom = 1;// BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterDevice[ i + numBlockX ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + } - - BlockIterDevice[ i ] = pom;//BlockIterPom[ i ]; } } template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ) { int i = threadIdx.x; int blId = blockIdx.x; - __shared__ volatile int sArray[ 512 ]; + int blockSize = blockDim.x; + /*if ( i == 0 && blId == 0 ){ + printf( "nBlocks = %d \n", nBlocks ); + for( int j = nBlocks-1; j > -1 ; j--){ + printf( "cislo = %d \n", BlockIterDevice[ j ] ); + } + }*/ + __shared__ int sArray[ 1024 ]; sArray[ i ] = 0; - if(blId * 512 + i < nBlocks ) - sArray[ i ] = BlockIterDevice[ blId * 512 + i ]; + if( blId * 1024 + i < nBlocks ) + sArray[ i ] = BlockIterDevice[ blId * 1024 + i ]; __syncthreads(); - if (blockDim.x == 1024) { + /*extern __shared__ volatile int sArray[]; + unsigned int i = threadIdx.x; + unsigned int gid = blockIdx.x * blockSize * 2 + threadIdx.x; + unsigned int gridSize = blockSize * 2 * gridDim.x; + sArray[ i ] = 0; + while( gid < nBlocks ) + { + sArray[ i ] += BlockIterDevice[ gid ] + BlockIterDevice[ gid + blockSize ]; + gid += gridSize; + } + __syncthreads();*/ + + if ( blockSize == 1024) { if (i < 512) sArray[ i ] += sArray[ i + 512 ]; } __syncthreads(); - if (blockDim.x >= 512) { + if (blockSize >= 512) { if (i < 256) { sArray[ i ] += sArray[ i + 256 ]; } } - if (blockDim.x >= 256) { + __syncthreads(); + if (blockSize >= 256) { if (i < 128) { sArray[ i ] += sArray[ i + 128 ]; } } __syncthreads(); - if (blockDim.x >= 128) { + if (blockSize >= 128) { if (i < 64) { sArray[ i ] += sArray[ i + 64 ]; } @@ -353,12 +489,12 @@ __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, I __syncthreads(); if (i < 32 ) { - if( blockDim.x >= 64 ) sArray[ i ] += sArray[ i + 32 ]; - if( blockDim.x >= 32 ) sArray[ i ] += sArray[ i + 16 ]; - if( blockDim.x >= 16 ) sArray[ i ] += sArray[ i + 8 ]; - if( blockDim.x >= 8 ) sArray[ i ] += sArray[ i + 4 ]; - if( blockDim.x >= 4 ) sArray[ i ] += sArray[ i + 2 ]; - if( blockDim.x >= 2 ) sArray[ i ] += sArray[ i + 1 ]; + if( blockSize >= 64 ) sArray[ i ] += sArray[ i + 32 ]; + if( blockSize >= 32 ) sArray[ i ] += sArray[ i + 16 ]; + if( blockSize >= 16 ) sArray[ i ] += sArray[ i + 8 ]; + if( blockSize >= 8 ) sArray[ i ] += sArray[ i + 4 ]; + if( blockSize >= 4 ) sArray[ i ] += sArray[ i + 2 ]; + if( blockSize >= 2 ) sArray[ i ] += sArray[ i + 1 ]; } if( i == 0 ) @@ -367,94 +503,120 @@ __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, I -template < typename Real, typename Device, typename Index > +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int ne ) + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; - int blIdx = blockIdx.x; int blIdy = blockIdx.y; - int grIdx = gridDim.x; - - if( BlockIterDevice[ blIdy * grIdx + blIdx] ) + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + /** FOR CHESS METHOD */ + if( (blockIdx.y%2 + blockIdx.x) % 2 == oddEvenBlock ) { - - const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + /**-----------------------------------------*/ + - int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - __shared__ volatile int numOfBlockx; - __shared__ volatile int numOfBlocky; - __shared__ int xkolik; - __shared__ int ykolik; - __shared__ volatile int NE; - if( thri == 0 && thrj == 0 ) + /** FOR FIM METHOD */ + /*if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x] ) + {*/ + /**-----------------------------------------*/ + const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + __shared__ volatile int dimX; + __shared__ volatile int dimY; + __shared__ volatile Real hx; + __shared__ volatile Real hy; + if( thri==0 && thrj == 0) { - xkolik = blockDim.x + 1; - ykolik = blockDim.y + 1; - numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); - numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); - - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - BlockIterDevice[ blIdy * grIdx + blIdx ] = 0; - NE = ne; + dimX = mesh.getDimensions().x(); + dimY = mesh.getDimensions().y(); + hx = mesh.getSpaceSteps().x(); + hy = mesh.getSpaceSteps().y(); + BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] = 0; } __syncthreads(); - - int i = thri + blockDim.x*blIdx; - int j = blockDim.y*blIdy + thrj; + int numOfBlockx; + int numOfBlocky; + int xkolik; + int ykolik; + + xkolik = blockDim.x + 1; + ykolik = blockDim.y + 1; + numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); + numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); + + if( numOfBlockx - 1 == blockIdx.x ) + xkolik = dimX - (blockIdx.x)*blockDim.x+1; + + if( numOfBlocky -1 == blockIdx.y ) + ykolik = dimY - (blockIdx.y)*blockDim.y+1; + __syncthreads(); + int currentIndex = thrj * blockDim.x + thri; - if( BlockIterDevice[ blIdy * gridDim.x + blIdx] ) - { //__shared__ volatile bool changed[ blockDim.x*blockDim.y ]; - __shared__ volatile bool changed[16*16]; + __shared__ volatile bool changed[ (sizeSArray-2)*(sizeSArray-2)]; changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 ) changed[ 0 ] = true; - __shared__ Real hx; - __shared__ Real hy; - if( thrj == 1 && thri == 1 ) - { - hx = mesh.getSpaceSteps().x(); - hy = mesh.getSpaceSteps().y(); - } //__shared__ volatile Real sArray[ blockDim.y+2 ][ blockDim.x+2 ]; - __shared__ volatile Real sArray[18][18]; - sArray[thrj][thri] = std::numeric_limits< Real >::max(); + __shared__ volatile Real sArray[ sizeSArray * sizeSArray ]; + sArray[ thrj * sizeSArray + thri ] = std::numeric_limits< Real >::max(); //filling sArray edges if( thri == 0 ) - { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && NE == 1 ) - sArray[thrj+1][xkolik] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; + { + if( dimX > (blockIdx.x+1) * blockDim.x && thrj+1 < ykolik ) + sArray[(thrj+1)*sizeSArray + xkolik] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + (thrj+1)*dimX + xkolik ]; else - sArray[thrj+1][xkolik] = std::numeric_limits< Real >::max(); + sArray[(thrj+1)*sizeSArray + xkolik] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { - if( blIdx != 0 && thrj+1 < ykolik && NE == 1 ) - sArray[thrj+1][0] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + (thrj+1)*dimX ]; + if( blockIdx.x != 0 && thrj+1 < ykolik ) + sArray[(thrj+1)*sizeSArray + 0] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + (thrj+1)*dimX ]; else - sArray[thrj+1][0] = std::numeric_limits< Real >::max(); + sArray[(thrj+1)*sizeSArray + 0] = std::numeric_limits< Real >::max(); } if( thri == 2 ) { - if( dimY > (blIdy+1) * blockDim.y && thri+1 < xkolik && NE == 1 ) - sArray[ykolik][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + if( dimY > (blockIdx.y+1) * blockDim.y && thrj+1 < xkolik ) + sArray[ ykolik*sizeSArray + thrj+1 ] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + ykolik*dimX + thrj+1 ]; + else + sArray[ykolik*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); + + } + + if( thri == 3 ) + { + if( blockIdx.y != 0 && thrj+1 < xkolik ) + sArray[0*sizeSArray + thrj+1] = aux[ blockIdx.y*blockDim.y*dimX - dimX + blockIdx.x*blockDim.x - 1 + thrj+1 ]; else - sArray[ykolik][thrj+1] = std::numeric_limits< Real >::max(); + sArray[0*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); + } + + if( i < dimX && j < dimY ) + { + sArray[(thrj+1)*sizeSArray + thri+1] = aux[ j*dimX + i ]; } + while( changed[ 0 ] ) + { + __syncthreads(); + + changed[ currentIndex] = false; + + //calculation of update cell + if( i < dimX && j < dimY ) + { + if( ! interfaceMap[ j * dimX + i ] ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, hx,hy); } } __syncthreads(); @@ -488,30 +650,36 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< { if( currentIndex < 64 ) { - if( thri == 3 ) - { - if( blIdy != 0 && thrj+1 < xkolik && NE == 1 ) - sArray[0][thrj+1] = aux[ blIdy*blockDim.y*dimX - dimX + blIdx*blockDim.x - 1 + thrj+1 ]; - else - sArray[0][thrj+1] = std::numeric_limits< Real >::max(); - } - - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) - { - sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; - } - __syncthreads(); - - while( changed[ 0 ] ) - { + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; + } + } __syncthreads(); - - changed[ currentIndex] = false; - - //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + if( currentIndex < 32 ) { - if( ! interfaceMap[ j * mesh.getDimensions().x() + i ] ) + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + if( thri == 0 && thrj == 0 && changed[ 0 ] ){ + BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] = 1; + } + /*if( thri==0 && thrj == 0 && blockIdx.x == 0 && blockIdx.y == 0 ) + { + for( int k = 15; k>-1; k-- ){ + for( int l = 0; l < 16; l++ ) + printf( "%f\t", sArray[k * 16 + l]); + printf( "\n"); + } + printf( "\n"); + }*/ + __syncthreads(); + } + if( i < dimX && j < dimY ) + helpFunc[ j * dimX + i ] = sArray[ ( thrj + 1 ) * sizeSArray + thri + 1 ]; + + } } #endif -- GitLab From a6cfb604446996a5f1296c45b8a5dc28b194ebf1 Mon Sep 17 00:00:00 2001 From: Fencl Date: Wed, 31 Oct 2018 06:44:59 +0100 Subject: [PATCH 14/20] Repair of last commit (error for - wihtout cuda): FIM method implemented for 2D GPU and FIM-FSM implemented for 2D CPU (parallel). --- .../tnlDirectEikonalMethodsBase_impl.h | 119 +++++++++--------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 95971c9b8..500d1bf03 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -11,6 +11,7 @@ #include #include "tnlFastSweepingMethod.h" +#include "tnlDirectEikonalMethodsBase.h" template< typename Real, typename Device, @@ -135,7 +136,7 @@ updateBlocks( InterfaceMapType interfaceMap, bool changed = false; - RealType *sArray; + Real *sArray; sArray = new Real[ sizeSArray * sizeSArray ]; if( sArray == nullptr ) std::cout << "Error while allocating memory for sArray." << std::endl; @@ -175,7 +176,7 @@ updateBlocks( InterfaceMapType interfaceMap, //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - pom = this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + pom = this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); changed = changed || pom; } } @@ -195,7 +196,7 @@ updateBlocks( InterfaceMapType interfaceMap, { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); } } } @@ -213,7 +214,7 @@ updateBlocks( InterfaceMapType interfaceMap, { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); } } } @@ -231,7 +232,7 @@ updateBlocks( InterfaceMapType interfaceMap, { if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - this->updateCell( sArray/*[i]*/, l+1, k+1, hx,hy); + this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx, hy, 1.0); } } } @@ -258,7 +259,7 @@ updateBlocks( InterfaceMapType interfaceMap, } //std::cout< +template< int sizeSArray > +__cuda_callable__ +bool +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, + const Real v ) +{ + const RealType value = sArray[ thrj * sizeSArray + thri ]; + RealType a, b, tmp = std::numeric_limits< RealType >::max(); + + b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], + sArray[ (thrj-1) * sizeSArray + thri ] ); + + a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], + sArray[ thrj * sizeSArray + thri-1 ] ); + + if( fabs( a ) == std::numeric_limits< RealType >::max() && + fabs( b ) == std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrj * sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + + return false; +} #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -1133,58 +1185,7 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 } -template< typename Real, - typename Device, - typename Index > -template< int sizeSArray > -__cuda_callable__ -bool -tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: -updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real hy, - const Real v ) -{ - const RealType value = sArray[ thrj * sizeSArray + thri ]; - RealType a, b, tmp = std::numeric_limits< RealType >::max(); - - b = TNL::argAbsMin( sArray[ (thrj+1) * sizeSArray + thri ], - sArray[ (thrj-1) * sizeSArray + thri ] ); - - a = TNL::argAbsMin( sArray[ thrj * sizeSArray + thri+1 ], - sArray[ thrj * sizeSArray + thri-1 ] ); - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, std::numeric_limits< RealType >::max(), (RealType)hx, (RealType)hy, 0.0 }; - sortMinims( pom ); - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; - - - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj * sizeSArray + thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - sArray[ thrj * sizeSArray + thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrj * sizeSArray + thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - - return false; -} + template< typename Real, typename Device, -- GitLab From 39b4889c9c0205f550e334f646d1c244530d788d Mon Sep 17 00:00:00 2001 From: Fencl Date: Thu, 1 Nov 2018 16:26:36 +0100 Subject: [PATCH 15/20] Last repair of FIM for GPU. --- .../tnlDirectEikonalMethodsBase.h | 2 +- .../tnlDirectEikonalMethodsBase_impl.h | 72 ++++---- .../tnlFastSweepingMethod2D_impl.h | 165 ++++++++++-------- 3 files changed, 125 insertions(+), 114 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index cbb1a1ff6..ccbae8abe 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -148,7 +148,7 @@ __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, I template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 500d1bf03..5083544e2 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -134,6 +134,7 @@ updateBlocks( InterfaceMapType interfaceMap, Real hy = mesh.getSpaceSteps().y(); bool changed = false; + BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; Real *sArray; @@ -143,53 +144,52 @@ updateBlocks( InterfaceMapType interfaceMap, for( int thri = 0; thri < sizeSArray; thri++ ){ for( int thrj = 0; thrj < sizeSArray; thrj++ ) - sArray/*[i]*/[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); + sArray[ thri * sizeSArray + thrj ] = std::numeric_limits< Real >::max(); } - BlockIterHost[ blIdy * numOfBlockx + blIdx ] = 0; for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) - sArray/*[i]*/[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; + sArray[ ( thrj+1 )* sizeSArray +xkolik] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX + xkolik ]; if( blIdx != 0 && thrj+1 < ykolik ) - sArray/*[i]*/[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; + sArray[(thrj+1)* sizeSArray] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + (thrj+1)*dimX ]; if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik ) - sArray/*[i]*/[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; + sArray[ykolik * sizeSArray + thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + ykolik*dimX + thrj+1 ]; if( blIdy != 0 && thrj+1 < xkolik ) - sArray/*[i]*/[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; + sArray[thrj+1] = aux[ blIdy*numThreadsPerBlock*dimX - dimX + blIdx*numThreadsPerBlock - 1 + thrj+1 ]; } for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) - sArray/*[i]*/[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; + sArray[(k+1) * sizeSArray + l+1] = aux[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ]; } - bool pom = false; + for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ){ if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ){ //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; if( ! interfaceMap[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] ) { - pom = this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy); - changed = changed || pom; + changed = this->template updateCell< sizeSArray >( sArray, l+1, k+1, hx,hy) || changed; + } } } } /*aux.save( "aux-1pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ - + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ + for( int k = 0; k < numThreadsPerBlock; k++ ) for( int l = numThreadsPerBlock-1; l >-1; l-- ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) @@ -201,12 +201,12 @@ updateBlocks( InterfaceMapType interfaceMap, } } /*aux.save( "aux-2pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ) for( int l = 0; l < numThreadsPerBlock; l++ ) { @@ -219,12 +219,12 @@ updateBlocks( InterfaceMapType interfaceMap, } } /*aux.save( "aux-3pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ for( int k = numThreadsPerBlock-1; k > -1; k-- ){ for( int l = numThreadsPerBlock-1; l >-1; l-- ) { @@ -238,12 +238,12 @@ updateBlocks( InterfaceMapType interfaceMap, } } /*aux.save( "aux-4pruch.tnl" ); - for( int k = 0; k < sizeSArray; k++ ){ - for( int l = 0; l < sizeSArray; l++ ) { - std::cout << sArray[ k * sizeSArray + l] << " "; - } - std::cout << std::endl; - }*/ + for( int k = 0; k < sizeSArray; k++ ){ + for( int l = 0; l < sizeSArray; l++ ) { + std::cout << sArray[ k * sizeSArray + l] << " "; + } + std::cout << std::endl; + }*/ if( changed ){ @@ -254,7 +254,7 @@ updateBlocks( InterfaceMapType interfaceMap, for( int k = 0; k < numThreadsPerBlock; k++ ){ for( int l = 0; l < numThreadsPerBlock; l++ ) { if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX ) - helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray/*[i]*/[ (k + 1)* sizeSArray + l + 1 ]; + helpFunc[ blIdy * numThreadsPerBlock * dimX + numThreadsPerBlock * blIdx + k*dimX + l ] = sArray[ (k + 1)* sizeSArray + l + 1 ]; //std::cout<< sArray[k+1][l+1]; } //std::cout<template updateBlocks< 1026 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ if( IsCalculationDone == 0 ){ IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; @@ -130,6 +131,7 @@ solve( const MeshPointer& mesh, } } numWhile++; + std::cout <<"numWhile = "<< numWhile <-1; j-- ){ for( int i = 0; i < numBlocksX; i++ ) @@ -146,7 +148,6 @@ solve( const MeshPointer& mesh, std::cout << std::endl; } std::cout << std::endl;*/ - //Reduction //std::cout<updateCell( aux, cell ); } - } + } //aux.save( "aux-1.tnl" ); @@ -261,12 +262,12 @@ solve( const MeshPointer& mesh, TNL_CHECK_CUDA_DEVICE; - /*TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; - BlockIterPom.setSize( numBlocksX * numBlocksY ); - BlockIterPom.setValue( 0 );*/ + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; + BlockIterPom.setSize( numBlocksX * numBlocksY ); + BlockIterPom.setValue( 0 ); /*TNL::Containers::Array< int, Devices::Host, IndexType > BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -284,9 +285,7 @@ solve( const MeshPointer& mesh, cudaMalloc((void**) &dBlock, nBlocks * sizeof( int ) );*/ - MeshFunctionPointer helpFunc1; - helpFunc1->setMesh(mesh); - + MeshFunctionPointer helpFunc1( mesh ); MeshFunctionPointer helpFunc( mesh ); helpFunc1 = auxPtr; @@ -301,83 +300,94 @@ solve( const MeshPointer& mesh, /** HERE IS CHESS METHOD **/ /*auxPtr = helpFunc; + + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; + + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + + CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice, + oddEvenBlock ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + auxPtr = helpFunc; + + oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + + CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + BlockIterD = dBlock.getElement( 0 );*/ + + /**------------------------------------------------------------------------------------------------*/ + + + /** HERE IS FIM **/ + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + TNL_CHECK_CUDA_DEVICE; + + //int pocBloku = 0; + Devices::Cuda::synchronizeDevice(); CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), - auxPtr.template getData< Device>(), + auxPtr.template modifyData< Device>(), helpFunc.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); + BlockIterDevice ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - auxPtr = helpFunc; - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; + //std::cout << "Pocet aktivnich bloku = " << pocBloku << std::endl; + //BlockIterPom1 = BlockIterDevice; + ///for( int i =0; i< numBlocksX; i++ ){ + // for( int j = 0; j < numBlocksY; j++ ) + // { + // std::cout << BlockIterPom1[j*numBlocksX + i]; + // } + // std::cout << std::endl; + //} + //std::cout << std::endl; - CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template getData< Device>(), - helpFunc.template modifyData< Device>(), - BlockIterDevice, - oddEvenBlock ); + GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - auxPtr = helpFunc; + BlockIterDevice = BlockIterPom; + + //std::cout<< "Probehlo" << std::endl; + + //TNL::swap( auxPtr, helpFunc ); - oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; - BlockIterD = dBlock.getElement( 0 );*/ - /**------------------------------------------------------------------------------------------------*/ + BlockIterD = dBlock.getElement( 0 ); + //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; - /** HERE IS FIM **/ - - helpFunc1 = auxPtr; - auxPtr = helpFunc; - helpFunc = helpFunc1; - - //int pocBloku = 0; - Devices::Cuda::synchronizeDevice(); - CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - helpFunc.template modifyData< Device>(), - BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //std::cout << "Pocet aktivnich bloku = " << pocBloku << std::endl; - - GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, numBlocksX, numBlocksY ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //std::cout<< "Probehlo" << std::endl; - - //TNL::swap( auxPtr, helpFunc ); - - - CudaParallelReduc<<< nBlocks , 1024 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - TNL_CHECK_CUDA_DEVICE; - - - BlockIterD = dBlock.getElement( 0 ); - //cudaMemcpy( &BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - /**-----------------------------------------------------------------------------------------------------------*/ /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ @@ -392,7 +402,6 @@ solve( const MeshPointer& mesh, cudaFree( dBlock ); delete BlockIter;*/ cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; aux = *auxPtr; @@ -410,7 +419,7 @@ solve( const MeshPointer& mesh, template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - /*TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom,*/ int numBlockX, int numBlockY ) + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ) { int i = blockIdx.x * 1024 + threadIdx.x; @@ -430,7 +439,7 @@ __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index pom = 1;//BlockIterPom[ i ] = 1; } - BlockIterDevice[ i ] = pom;//BlockIterPom[ i ]; + BlockIterPom[ i ] = pom;//BlockIterPom[ i ]; } } @@ -514,14 +523,16 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; /** FOR CHESS METHOD */ - if( (blockIdx.y%2 + blockIdx.x) % 2 == oddEvenBlock ) - { - /**-----------------------------------------*/ - + //if( (blockIdx.y%2 + blockIdx.x) % 2 == oddEvenBlock ) + //{ + /**------------------------------------------*/ + + + /** FOR FIM METHOD */ - /** FOR FIM METHOD */ - /*if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x] ) - {*/ + if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] ) + { + __syncthreads(); /**-----------------------------------------*/ const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); __shared__ volatile int dimX; -- GitLab From e3c970e1396fadc6b7fb1e97f0cf734037a19fd2 Mon Sep 17 00:00:00 2001 From: Fencl Date: Mon, 5 Nov 2018 14:43:21 +0100 Subject: [PATCH 16/20] FIM implemented in 3D --- .../tnlDirectEikonalMethodsBase.h | 10 +- .../tnlFastSweepingMethod2D_impl.h | 16 +- .../tnlFastSweepingMethod3D_impl.h | 881 +++++++++--------- 3 files changed, 478 insertions(+), 429 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index ccbae8abe..7d990c1bb 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -160,11 +160,17 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); -template < typename Real, typename Device, typename Index > +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + +template < typename Index > +__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, + int numBlockX, int numBlockY, int numBlockZ ); #endif #include "tnlDirectEikonalMethodsBase_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 546cfe9aa..b823fec03 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -85,7 +85,7 @@ solve( const MeshPointer& mesh, { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 1024; + int numThreadsPerBlock = 16; int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); @@ -115,13 +115,13 @@ solve( const MeshPointer& mesh, } std::cout<template updateBlocks< 1026 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ @@ -394,9 +394,7 @@ solve( const MeshPointer& mesh, numIter ++; } if( numIter == 1 ){ - helpFunc1 = auxPtr; auxPtr = helpFunc; - helpFunc = helpFunc1; } /*cudaFree( BlockIterDevice ); cudaFree( dBlock ); @@ -535,10 +533,10 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< __syncthreads(); /**-----------------------------------------*/ const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); - __shared__ volatile int dimX; - __shared__ volatile int dimY; - __shared__ volatile Real hx; - __shared__ volatile Real hy; + __shared__ int dimX; + __shared__ int dimY; + __shared__ Real hx; + __shared__ Real hy; if( thri==0 && thrj == 0) { dimX = mesh.getDimensions().x(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 4daf9fc92..65aba5bf5 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -16,511 +16,556 @@ #include "tnlFastSweepingMethod.h" template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 1 ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > const Index& FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: getMaxIterations() const { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: setMaxIterations( const IndexType& maxIterations ) { - + } template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > void FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ) + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ) { - MeshFunctionPointer auxPtr; - InterfaceMapPointer interfaceMapPtr; - auxPtr->setMesh( mesh ); - interfaceMapPtr->setMesh( mesh ); - std::cout << "Initiating the interface cells ..." << std::endl; - BaseType::initInterface( u, auxPtr, interfaceMapPtr ); + MeshFunctionPointer auxPtr; + InterfaceMapPointer interfaceMapPtr; + auxPtr->setMesh( mesh ); + interfaceMapPtr->setMesh( mesh ); + std::cout << "Initiating the interface cells ..." << std::endl; + BaseType::initInterface( u, auxPtr, interfaceMapPtr ); #ifdef HAVE_CUDA - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); #endif - auxPtr->save( "aux-ini.tnl" ); - - typename MeshType::Cell cell( *mesh ); - - IndexType iteration( 0 ); - MeshFunctionType aux = *auxPtr; - InterfaceMapType interfaceMap = * interfaceMapPtr; - while( iteration < this->maxIterations ) + auxPtr->save( "aux-ini.tnl" ); + + typename MeshType::Cell cell( *mesh ); + + IndexType iteration( 0 ); + MeshFunctionType aux = *auxPtr; + InterfaceMapType interfaceMap = * interfaceMapPtr; + while( iteration < this->maxIterations ) + { + if( std::is_same< DeviceType, Devices::Host >::value ) { - if( std::is_same< DeviceType, Devices::Host >::value ) + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) { - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-1.tnl" ); - - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-3.tnl" ); - - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-4.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "5 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-5.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "6 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-6.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "7 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "8 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } } - if( std::is_same< DeviceType, Devices::Cuda >::value ) + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) { - // TODO: CUDA code -#ifdef HAVE_CUDA - const int cudaBlockSize( 8 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); - int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z(), cudaBlockSize ); - if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) - std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; - dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); - dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); - - tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; - - - int BlockIterD = 1; - - TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; - BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ ); - BlockIterDevice.setValue( 1 ); - /*int *BlockIterDevice; - cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/ - int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); - - TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; - dBlock.setSize( nBlocks ); - dBlock.setValue( 0 ); - /*int *dBlock; - cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ - - while( BlockIterD ) + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) { - CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, - interfaceMapPtr.template getData< Device >(), - auxPtr.template modifyData< Device>(), - BlockIterDevice ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); - - /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) - BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ - + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); } - //cudaFree( BlockIterDevice ); - //cudaFree( dBlock ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - aux = *auxPtr; - interfaceMap = *interfaceMapPtr; -#endif + } } - - //aux.save( "aux-8.tnl" ); - iteration++; + //aux.save( "aux-2.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-4.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "5 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-5.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "6 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-6.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "7 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-7.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "8 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { + // TODO: CUDA code +#ifdef HAVE_CUDA + const int cudaBlockSize( 8 ); + int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z(), cudaBlockSize ); + if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) + std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; + dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); + dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); + + tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; + + + int BlockIterD = 1; + + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterDevice; + BlockIterDevice.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterDevice.setValue( 1 ); + TNL::Containers::Array< int, Devices::Cuda, IndexType > BlockIterPom; + BlockIterPom.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterPom.setValue( 0 ); + /*int *BlockIterDevice; + cudaMalloc(&BlockIterDevice, ( numBlocksX * numBlocksY * numBlocksZ ) * sizeof( int ) );*/ + int nBlocks = ( numBlocksX * numBlocksY * numBlocksZ )/512 + ((( numBlocksX * numBlocksY * numBlocksZ )%512 != 0) ? 1:0); - } - aux.save("aux-final.tnl"); + TNL::Containers::Array< int, Devices::Cuda, IndexType > dBlock; + dBlock.setSize( nBlocks ); + dBlock.setValue( 0 ); + + int nBlocksNeigh = ( numBlocksX * numBlocksY * numBlocksZ )/1024 + ((( numBlocksX * numBlocksY * numBlocksZ )%1024 != 0) ? 1:0); + /*int *dBlock; + cudaMalloc(&dBlock, nBlocks * sizeof( int ) );*/ + MeshFunctionPointer helpFunc1( mesh ); + MeshFunctionPointer helpFunc( mesh ); + + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + int numIter = 0; + + while( BlockIterD ) + { + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + TNL_CHECK_CUDA_DEVICE; + + CudaUpdateCellCaller< 10 ><<< gridSize, blockSize >>>( ptr, + interfaceMapPtr.template getData< Device >(), + auxPtr.template getData< Device>(), + helpFunc.template modifyData< Device>(), + BlockIterDevice ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + GetNeighbours3D<<< nBlocksNeigh, 1024 >>>( BlockIterDevice, BlockIterPom, numBlocksX, numBlocksY, numBlocksZ ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + BlockIterDevice = BlockIterPom; + + CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY * numBlocksZ ) ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + + CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); + numIter++; + /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) + BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ + + } + if( numIter == 1 ){ + auxPtr = helpFunc; + } + //cudaFree( BlockIterDevice ); + //cudaFree( dBlock ); + cudaDeviceSynchronize(); + TNL_CHECK_CUDA_DEVICE; + aux = *auxPtr; + interfaceMap = *interfaceMapPtr; +#endif + } + + //aux.save( "aux-8.tnl" ); + iteration++; + + } + aux.save("aux-final.tnl"); } #ifdef HAVE_CUDA -template < typename Real, typename Device, typename Index > +template < typename Index > +__global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, + int numBlockX, int numBlockY, int numBlockZ ) +{ + int i = blockIdx.x * 1024 + threadIdx.x; + + if( i < numBlockX * numBlockY * numBlockZ ) + { + int pom = 0;//BlockIterPom[ i ] = 0; + int m=0, l=0, k=0; + l = i/( numBlockX * numBlockY ); + k = (i-l*numBlockX * numBlockY )/(numBlockX ); + m = (i-l*numBlockX * numBlockY )%( numBlockX ); + if( m > 0 && BlockIterDevice[ i - 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterDevice[ i + 1 ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterDevice[ i - numBlockX ] ){ + pom = 1;// BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterDevice[ i + numBlockX ] ){ + pom = 1;//BlockIterPom[ i ] = 1; + }else if( l > 0 && BlockIterDevice[ i - numBlockX*numBlockY ] ){ + pom = 1; + }else if( l < numBlockZ-1 && BlockIterDevice[ i + numBlockX*numBlockY ] ){ + pom = 1; + } + + BlockIterPom[ i ] = pom;//BlockIterPom[ i ]; + } +} + +template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ) { - int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; - int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - int k = blockDim.z*blockIdx.z + threadIdx.z; - int currentIndex = thrk * blockDim.x * blockDim.y + thrj * blockDim.x + thri; + int thri = threadIdx.x; int thrj = threadIdx.y; int thrk = threadIdx.z; + int blIdx = blockIdx.x; int blIdy = blockIdx.y; int blIdz = blockIdx.z; + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + int k = blockDim.z*blockIdx.z + threadIdx.z; + int currentIndex = thrk * blockDim.x * blockDim.y + thrj * blockDim.x + thri; + + if( BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] ) + { + __syncthreads(); - __shared__ volatile bool changed[8*8*8]; - changed[ currentIndex ] = false; + __shared__ volatile bool changed[ (sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)]; + changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 && thrk == 0 ) - changed[ 0 ] = true; + changed[ 0 ] = true; const Meshes::Grid< 3, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); - __shared__ Real hx; - __shared__ Real hy; - __shared__ Real hz; + __shared__ Real hx; __shared__ int dimX; + __shared__ Real hy; __shared__ int dimY; + __shared__ Real hz; __shared__ int dimZ; + if( thrj == 1 && thri == 1 && thrk == 1 ) { - hx = mesh.getSpaceSteps().x(); - hy = mesh.getSpaceSteps().y(); - hz = mesh.getSpaceSteps().z(); + hx = mesh.getSpaceSteps().x(); + hy = mesh.getSpaceSteps().y(); + hz = mesh.getSpaceSteps().z(); + dimX = mesh.getDimensions().x(); + dimY = mesh.getDimensions().y(); + dimZ = mesh.getDimensions().z(); + BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] = 0; } - __shared__ volatile Real sArray[10][10][10]; - sArray[thrk][thrj][thri] = std::numeric_limits< Real >::max(); - if(thri == 0 ) - { - sArray[8][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); - sArray[thrk+1][thrj+1][8] = std::numeric_limits< Real >::max(); - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); - sArray[thrj+1][8][thrk+1] = std::numeric_limits< Real >::max(); - sArray[thrj+1][9][thrk+1] = std::numeric_limits< Real >::max(); - } - + __shared__ volatile Real sArray[sizeSArray][sizeSArray][sizeSArray]; + sArray[thrk+1][thrj+1][thri+1] = std::numeric_limits< Real >::max(); + //filling sArray edges - int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); - int dimZ = mesh.getDimensions().z(); - __shared__ volatile int numOfBlockx; - __shared__ volatile int numOfBlocky; - __shared__ volatile int numOfBlockz; - __shared__ int xkolik; - __shared__ int ykolik; - __shared__ int zkolik; - if( thri == 0 && thrj == 0 && thrk == 0 ) - { - xkolik = blockDim.x + 1; - ykolik = blockDim.y + 1; - zkolik = blockDim.z + 1; - numOfBlocky = dimY/blockDim.y + ((dimY%blockDim.y != 0) ? 1:0); - numOfBlockx = dimX/blockDim.x + ((dimX%blockDim.x != 0) ? 1:0); - numOfBlockz = dimZ/blockDim.z + ((dimZ%blockDim.z != 0) ? 1:0); - - if( numOfBlockx - 1 == blIdx ) - xkolik = dimX - (blIdx)*blockDim.x+1; - - if( numOfBlocky -1 == blIdy ) - ykolik = dimY - (blIdy)*blockDim.y+1; - if( numOfBlockz-1 == blIdz ) - zkolik = dimZ - (blIdz)*blockDim.z+1; - - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 0; - } + int numOfBlockx; + int numOfBlocky; + int numOfBlockz; + int xkolik; + int ykolik; + int zkolik; + xkolik = blockDim.x + 1; + ykolik = blockDim.y + 1; + zkolik = blockDim.z + 1; + numOfBlockx = gridDim.x; + numOfBlocky = gridDim.y; + numOfBlockz = gridDim.z; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*blockDim.x+1; + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*blockDim.y+1; + if( numOfBlockz-1 == blIdz ) + zkolik = dimZ - (blIdz)*blockDim.z+1; __syncthreads(); if( thri == 0 ) { - if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; - else - sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); + if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; + else + sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { - if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; - else - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); + if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; + else + sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); } if( thri == 2 ) { - if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; - else - sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); + if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; + else + sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 3 ) { - if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; - else - sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); + if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; + else + sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 4 ) { - if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; - else - sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; + else + sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); } if( thri == 5 ) { - if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; - else - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; + else + sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); } - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() ) + if( i < dimX && j < dimY && k < dimZ ) { - sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; + sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; } - __shared__ volatile int loopcounter; - loopcounter = 0; __syncthreads(); while( changed[ 0 ] ) { - __syncthreads(); - - changed[ currentIndex ] = false; - - //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) - { - if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) - { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); - } - } - __syncthreads(); - - //pyramid reduction - if( blockDim.x*blockDim.y*blockDim.z == 1024 ) - { - if( currentIndex < 512 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; - } - } - __syncthreads(); - if( blockDim.x*blockDim.y*blockDim.z >= 512 ) + __syncthreads(); + + changed[ currentIndex ] = false; + + //calculation of update cell + if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) + { + if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) { - if( currentIndex < 256 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; - } + changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); } - __syncthreads(); - if( blockDim.x*blockDim.y*blockDim.z >= 256 ) + } + __syncthreads(); + + //pyramid reduction + if( blockDim.x*blockDim.y*blockDim.z == 1024 ) + { + if( currentIndex < 512 ) { - if( currentIndex < 128 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; - } + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 512 ]; } - __syncthreads(); - if( blockDim.x*blockDim.y*blockDim.z >= 128 ) + } + __syncthreads(); + if( blockDim.x*blockDim.y*blockDim.z >= 512 ) + { + if( currentIndex < 256 ) { - if( currentIndex < 64 ) - { - changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; - } + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 256 ]; } - __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + } + __syncthreads(); + if( blockDim.x*blockDim.y*blockDim.z >= 256 ) + { + if( currentIndex < 128 ) { - if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; - if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; - if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; - if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; - if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; - if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 128 ]; } - __syncthreads(); - - /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) - { - for(int m = 0; m < 8; m++){ - for(int n = 0; n<8; n++){ - for(int b=0; b<8; b++) - printf(" %i ", changed[m*64 + n*8 + b]); - printf("\n"); - } - printf("\n \n"); - } - }*/ - if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) + } + __syncthreads(); + if( blockDim.x*blockDim.y*blockDim.z >= 128 ) + { + if( currentIndex < 64 ) { - //loopcounter++; - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 64 ]; } - __syncthreads(); - /*if(thri == 0 && thrj==0 && thrk==0) - printf("%i \n",loopcounter); - if(loopcounter == 500) - break;*/ + } + __syncthreads(); + if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + { + if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; + if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; + if( currentIndex < 8 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 8 ]; + if( currentIndex < 4 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 4 ]; + if( currentIndex < 2 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 2 ]; + if( currentIndex < 1 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 1 ]; + } + __syncthreads(); + + /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) + { + for(int m = 0; m < 8; m++){ + for(int n = 0; n<8; n++){ + for(int b=0; b<8; b++) + printf(" %i ", changed[m*64 + n*8 + b]); + printf("\n"); + } + printf("\n \n"); + } + }*/ + if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) + { + BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + } + __syncthreads(); } - - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ && (!interfaceMap[ k*dimX*dimY+j * mesh.getDimensions().x() + i ]) ) - aux[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; -} + + if( i < dimX && j < dimY && k < dimZ ) + helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; + } +} #endif -- GitLab From 4cefa039f6ffa3b5e30c4249cb3e1f60ba860c3b Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 15 Nov 2018 13:24:51 +0100 Subject: [PATCH 17/20] Enabled computations with single precision. --- .../Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h index f8f9187fa..a2a1d7372 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/MainBuildConfig.h @@ -23,7 +23,7 @@ namespace Solvers { /**** * Turn off support for float and long double. */ -template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, float > { enum { enabled = false }; }; +template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, float > { enum { enabled = true }; }; template<> struct ConfigTagReal< HamiltonJacobiBuildConfig, long double > { enum { enabled = false }; }; /**** -- GitLab From 852534a88eab1e78b21d3fd41665dc7ca556c878 Mon Sep 17 00:00:00 2001 From: Fencl Date: Fri, 16 Nov 2018 12:03:40 +0100 Subject: [PATCH 18/20] 3D FSM+FIM implemented 2D FSM+FIM method pickes size of rectangular block depending on number of blocks --- .../tnlDirectEikonalMethodsBase.h | 214 ++++---- .../tnlDirectEikonalMethodsBase_impl.h | 519 +++++++++++++++--- .../hamilton-jacobi/tnlFastSweepingMethod.h | 222 ++++---- .../tnlFastSweepingMethod2D_impl.h | 74 ++- .../tnlFastSweepingMethod3D_impl.h | 455 +++++++++------ 5 files changed, 1004 insertions(+), 480 deletions(-) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 7d990c1bb..f712ce2cc 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -19,102 +19,112 @@ class tnlDirectEikonalMethodsBase }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > { - public: - - typedef Meshes::Grid< 1, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0 ); - - __cuda_callable__ bool updateCell( volatile Real sArray[18], - int thri, const Real h, - const Real velocity = 1.0 ); + public: + + typedef Meshes::Grid< 1, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 1, bool > InterfaceMapType; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0 ); + + __cuda_callable__ bool updateCell( volatile Real sArray[18], + int thri, const Real h, + const Real velocity = 1.0 ); }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { - public: - typedef Meshes::Grid< 2, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; - typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0 ); - - template< int sizeSArray > - __cuda_callable__ bool updateCell( volatile Real *sArray, - int thri, int thrj, const Real hx, const Real hy, - const Real velocity = 1.0 ); - - template< int sizeSArray > - void updateBlocks( InterfaceMapType interfaceMap, - MeshFunctionType aux, - MeshFunctionType helpFunc, - ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); - - void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); + public: + typedef Meshes::Grid< 2, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 2, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0 ); + + template< int sizeSArray > + __cuda_callable__ bool updateCell( volatile Real *sArray, + int thri, int thrj, const Real hx, const Real hy, + const Real velocity = 1.0 ); + + template< int sizeSArray > + void updateBlocks( InterfaceMapType interfaceMap, + MeshFunctionType aux, + MeshFunctionType helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY ); }; template< typename Real, - typename Device, - typename Index > + typename Device, + typename Index > class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { - public: - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DevcieType; - typedef Index IndexType; - typedef Functions::MeshFunction< MeshType > MeshFunctionType; - typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType; - using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; - using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; - - void initInterface( const MeshFunctionPointer& input, - MeshFunctionPointer& output, - InterfaceMapPointer& interfaceMap ); - - template< typename MeshEntity > - __cuda_callable__ void updateCell( MeshFunctionType& u, - const MeshEntity& cell, - const RealType velocity = 1.0); - - __cuda_callable__ bool updateCell( volatile Real sArray[10][10][10], - int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, - const Real velocity = 1.0 ); + public: + typedef Meshes::Grid< 3, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DevcieType; + typedef Index IndexType; + typedef Functions::MeshFunction< MeshType > MeshFunctionType; + typedef Functions::MeshFunction< MeshType, 3, bool > InterfaceMapType; + typedef TNL::Containers::Array< int, Device, IndexType > ArrayContainer; + using MeshFunctionPointer = Pointers::SharedPointer< MeshFunctionType >; + using InterfaceMapPointer = Pointers::SharedPointer< InterfaceMapType >; + + void initInterface( const MeshFunctionPointer& input, + MeshFunctionPointer& output, + InterfaceMapPointer& interfaceMap ); + + template< typename MeshEntity > + __cuda_callable__ void updateCell( MeshFunctionType& u, + const MeshEntity& cell, + const RealType velocity = 1.0); + + template< int sizeSArray > + void updateBlocks( const InterfaceMapType interfaceMap, + const MeshFunctionType aux, + MeshFunctionType& helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ); + + void getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ); + + template< int sizeSArray > + __cuda_callable__ bool updateCell3D( volatile Real *sArray, + int thri, int thrj, int thrk, const Real hx, const Real hy, const Real hz, + const Real velocity = 1.0 ); }; template < typename T1, typename T2 > @@ -126,46 +136,46 @@ __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, - Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, - bool *BlockIterDevice ); + const Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index >, 1, bool >& interfaceMap, + Functions::MeshFunction< Meshes::Grid< 1, Real, Device, Index > >& aux, + bool *BlockIterDevice ); template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, - const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, int oddEvenBlock =0); template < typename Index > __global__ void CudaParallelReduc( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); + TNL::Containers::Array< int, Devices::Cuda, Index > dBlock, int nBlocks ); template < typename Index > __global__ void GetNeighbours( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterPom, int numBlockX, int numBlockY ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap ); template < int sizeSArray, typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, - const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, - Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, - TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap, + const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux, + Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& helpFunc, + TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice ); template < typename Index > __global__ void GetNeighbours3D( TNL::Containers::Array< int, Devices::Cuda, Index > BlockIterDevice, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 5083544e2..8f7937541 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -148,6 +148,7 @@ updateBlocks( InterfaceMapType interfaceMap, } + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); for( int thrj = 0; thrj < numThreadsPerBlock + 1; thrj++ ) { if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik ) @@ -263,6 +264,370 @@ updateBlocks( InterfaceMapType interfaceMap, } } } +template< typename Real, + typename Device, + typename Index > +template< int sizeSArray > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +updateBlocks( const InterfaceMapType interfaceMap, + const MeshFunctionType aux, + MeshFunctionType& helpFunc, + ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ ) +{ +//#pragma omp parallel for schedule( dynamic ) + for( int i = 0; i < BlockIterHost.getSize(); i++ ) + { + if( BlockIterHost[ i ] ) + { + MeshType mesh = interfaceMap.template getMesh< Devices::Host >(); + + int dimX = mesh.getDimensions().x(); int dimY = mesh.getDimensions().y(); + int dimZ = mesh.getDimensions().z(); + //std::cout << "dimX = " << dimX << " ,dimY = " << dimY << std::endl; + int numOfBlocky = dimY/numThreadsPerBlock + ((dimY%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockx = dimX/numThreadsPerBlock + ((dimX%numThreadsPerBlock != 0) ? 1:0); + int numOfBlockz = dimZ/numThreadsPerBlock + ((dimZ%numThreadsPerBlock != 0) ? 1:0); + //std::cout << "numOfBlockx = " << numOfBlockx << " ,numOfBlocky = " << numOfBlocky << std::endl; + int xkolik = numThreadsPerBlock + 1; + int ykolik = numThreadsPerBlock + 1; + int zkolik = numThreadsPerBlock + 1; + + + int blIdz = i/( numOfBlockx * numOfBlocky ); + int blIdy = (i-blIdz*numOfBlockx * numOfBlocky )/(numOfBlockx ); + int blIdx = (i-blIdz*numOfBlockx * numOfBlocky )%( numOfBlockx ); + //std::cout << "blIdx = " << blIdx << " ,blIdy = " << blIdy << std::endl; + + if( numOfBlockx - 1 == blIdx ) + xkolik = dimX - (blIdx)*numThreadsPerBlock+1; + if( numOfBlocky -1 == blIdy ) + ykolik = dimY - (blIdy)*numThreadsPerBlock+1; + if( numOfBlockz-1 == blIdz ) + zkolik = dimZ - (blIdz)*numThreadsPerBlock+1; + //std::cout << "xkolik = " << xkolik << " ,ykolik = " << ykolik << std::endl; + + + /*bool changed[numThreadsPerBlock*numThreadsPerBlock]; + changed[ 0 ] = 1;*/ + Real hx = mesh.getSpaceSteps().x(); + Real hy = mesh.getSpaceSteps().y(); + Real hz = mesh.getSpaceSteps().z(); + + bool changed = false; + BlockIterHost[ i ] = 0; + + + Real *sArray; + sArray = new Real[ sizeSArray * sizeSArray * sizeSArray ]; + if( sArray == nullptr ) + std::cout << "Error while allocating memory for sArray." << std::endl; + + for( int k = 0; k < sizeSArray; k++ ) + for( int l = 0; l < sizeSArray; l++ ) + for( int m = 0; m < sizeSArray; m++ ){ + sArray[ m * sizeSArray * sizeSArray + k * sizeSArray + l ] = std::numeric_limits< Real >::max(); + } + + + for( int thrk = 0; thrk < numThreadsPerBlock; thrk++ ) + for( int thrj = 0; thrj < numThreadsPerBlock; thrj++ ) + { + if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj * dimX -1 + thrk*dimX*dimY ]; + + if( dimX > (blIdx+1) * numThreadsPerBlock && thrj+1 < ykolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + xkolik ] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy *numThreadsPerBlock*dimX+ blIdx*numThreadsPerBlock + numThreadsPerBlock + thrj * dimX + thrk*dimX*dimY ]; + + if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray +0*sizeSArray + thrj+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock - dimX + thrj + thrk*dimX*dimY ]; + + if( dimY > (blIdy+1) * numThreadsPerBlock && thrj+1 < xkolik && thrk+1 < zkolik ) + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + (blIdy+1) * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj + thrk*dimX*dimY ]; + + if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = + aux[ blIdz*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock - dimX * dimY + thrj * dimX + thrk ]; + + if( dimZ > (blIdz+1) * numThreadsPerBlock && thrj+1 < ykolik && thrk+1 < xkolik ) + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = + aux[ (blIdz+1)*numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock*dimX + blIdx*numThreadsPerBlock + thrj * dimX + thrk ]; + } + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + sArray[(m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1] = + aux[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ]; + } + } + } + /*string s; + int numWhile = 0; + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ){ + //std::cout << "proslo i = " << k * numThreadsPerBlock + l << std::endl; + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + //printf("In with point m = %d, k = %d, l = %d\n", m, k, l); + changed = this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz) || changed; + + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s ); + */ + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = 0; l template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = 0; l template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + for( int m = 0; m < numThreadsPerBlock; m++ ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + + for( int m = numThreadsPerBlock-1; m >-1; m-- ){ + for( int k = numThreadsPerBlock-1; k > -1; k-- ){ + for( int l = numThreadsPerBlock-1; l >-1; l-- ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + { + if( ! interfaceMap[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] ) + { + this->template updateCell3D< sizeSArray >( sArray, l+1, k+1, m+1, hx,hy,hz); + } + } + } + } + } + /*for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) + for( int m = 0; m < numThreadsPerBlock; m++ ) + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ) + helpFunc[ m*dimX*dimY + k*dimX + l ] = sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + } + numWhile++; + s = "helpFunc-"+ std::to_string(numWhile) + ".tnl"; + helpFunc.save( s );*/ + + if( changed ){ + BlockIterHost[ i ] = 1; + } + + + for( int k = 0; k < numThreadsPerBlock; k++ ){ + for( int l = 0; l < numThreadsPerBlock; l++ ) { + for( int m = 0; m < numThreadsPerBlock; m++ ){ + if( blIdy * numThreadsPerBlock + k < dimY && blIdx * numThreadsPerBlock + l < dimX && blIdz * numThreadsPerBlock + m < dimZ ){ + helpFunc[ blIdz * numThreadsPerBlock * dimX * dimY + blIdy * numThreadsPerBlock * dimX + blIdx*numThreadsPerBlock + m*dimX*dimY + k*dimX + l ] = + sArray[ (m+1) * sizeSArray * sizeSArray + (k+1) *sizeSArray + l+1 ]; + //std::cout << helpFunc[ m*dimX*dimY + k*dimX + l ] << " "; + } + } + //std::cout << std::endl; + } + //std::cout << std::endl; + } + //helpFunc.save( "helpF.tnl"); + delete []sArray; + } + } +} +template< typename Real, + typename Device, + typename Index > +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +getNeighbours( ArrayContainer BlockIterHost, int numBlockX, int numBlockY, int numBlockZ ) +{ + int* BlockIterPom; + BlockIterPom = new int [ numBlockX * numBlockY * numBlockZ ]; + + for( int i = 0; i< BlockIterHost.getSize(); i++) + { + BlockIterPom[ i ] = 0; + + int m=0, l=0, k=0; + l = i/( numBlockX * numBlockY ); + k = (i-l*numBlockX * numBlockY )/(numBlockX ); + m = (i-l*numBlockX * numBlockY )%( numBlockX ); + + if( m > 0 && BlockIterHost[ i - 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( m < numBlockX -1 && BlockIterHost[ i + 1 ] ){ + BlockIterPom[ i ] = 1; + }else if( k > 0 && BlockIterHost[ i - numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( k < numBlockY -1 && BlockIterHost[ i + numBlockX ] ){ + BlockIterPom[ i ] = 1; + }else if( l > 0 && BlockIterHost[ i - numBlockX*numBlockY ] ){ + BlockIterPom[ i ] = 1; + }else if( l < numBlockZ-1 && BlockIterHost[ i + numBlockX*numBlockY ] ){ + BlockIterPom[ i ] = 1; + } + } + for( int i = 0; i< BlockIterHost.getSize(); i++) + { + BlockIterHost[ i ] = BlockIterPom[ i ]; + } +} + template< typename Real, typename Device, @@ -619,8 +984,8 @@ initInterface( const MeshFunctionPointer& _input, { cell.refresh(); output[ cell.getIndex() ] = - input( cell ) > 0 ? std::numeric_limits< RealType >::max() : - - std::numeric_limits< RealType >::max(); + input( cell ) > 0 ? 10://std::numeric_limits< RealType >::max() : + -10;//- std::numeric_limits< RealType >::max(); interfaceMap[ cell.getIndex() ] = false; } @@ -967,6 +1332,82 @@ updateCell( volatile Real *sArray, int thri, int thrj, const Real hx, const Real return false; } +template< typename Real, + typename Device, + typename Index > +template< int sizeSArray > +__cuda_callable__ +bool +tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: +updateCell3D( volatile Real *sArray, int thri, int thrj, int thrk, + const Real hx, const Real hy, const Real hz, const Real v ) +{ + const RealType value = sArray[thrk *sizeSArray * sizeSArray + thrj * sizeSArray + thri]; + + RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); + + c = TNL::argAbsMin( sArray[ (thrk+1)* sizeSArray*sizeSArray + thrj * sizeSArray + thri ], + sArray[ (thrk-1) * sizeSArray *sizeSArray + thrj* sizeSArray + thri ] ); + + b = TNL::argAbsMin( sArray[ thrk* sizeSArray*sizeSArray + (thrj+1) * sizeSArray + thri ], + sArray[ thrk* sizeSArray * sizeSArray + (thrj-1)* sizeSArray +thri ] ); + + a = TNL::argAbsMin( sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri+1 ], + sArray[ thrk* sizeSArray * sizeSArray + thrj* sizeSArray +thri-1 ] ); + + /*if( thrk == 8 ) + printf("Calculating a = %f, b = %f, c = %f\n" , a, b, c );*/ + + if( fabs( a ) == 10&& //std::numeric_limits< RealType >::max() && + fabs( b ) == 10&&//std::numeric_limits< RealType >::max() && + fabs( c ) == 10)//std::numeric_limits< RealType >::max() ) + return false; + + RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; + + sortMinims( pom ); + + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + if( fabs( tmp ) < fabs( pom[ 2 ]) ) + { + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + else + { + tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + + TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - + hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - + hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); + sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ] = argAbsMin( value, tmp ); + tmp = value - sArray[ thrk* sizeSArray* sizeSArray + thrj* sizeSArray + thri ]; + if ( fabs( tmp ) > 0.001*hx ) + return true; + else + return false; + } + } + + return false; +} #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > @@ -1215,78 +1656,4 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) else return false; } - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool -tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >:: -updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, - const Real hx, const Real hy, const Real hz, const Real v ) -{ - const RealType value = sArray[thrk][thrj][thri]; - //std::cout << value << std::endl; - RealType a, b, c, tmp = std::numeric_limits< RealType >::max(); - - c = TNL::argAbsMin( sArray[ thrk+1 ][ thrj ][ thri ], - sArray[ thrk-1 ][ thrj ][ thri ] ); - - b = TNL::argAbsMin( sArray[ thrk ][ thrj+1 ][ thri ], - sArray[ thrk ][ thrj-1 ][ thri ] ); - - a = TNL::argAbsMin( sArray[ thrk ][ thrj ][ thri+1 ], - sArray[ thrk ][ thrj ][ thri-1 ] ); - - - if( fabs( a ) == std::numeric_limits< RealType >::max() && - fabs( b ) == std::numeric_limits< RealType >::max() && - fabs( c ) == std::numeric_limits< RealType >::max() ) - return false; - - RealType pom[6] = { a, b, c, (RealType)hx, (RealType)hy, (RealType)hz}; - - sortMinims( pom ); - - tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]; - if( fabs( tmp ) < fabs( pom[ 1 ] ) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + - TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); - if( fabs( tmp ) < fabs( pom[ 2 ]) ) - { - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - else - { - tmp = ( hy * hy * hz * hz * a + hx * hx * hz * hz * b + hx * hx * hy * hy * c + - TNL::sign( value ) * hx * hy * hz * TNL::sqrt( ( hx * hx * hz * hz + hy * hy * hz * hz + hx * hx * hy * hy)/( v * v ) - - hz * hz * ( a - b ) * ( a - b ) - hy * hy * ( a - c ) * ( a - c ) - - hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); - sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); - tmp = value - sArray[ thrk ][ thrj ][ thri ]; - if ( fabs( tmp ) > 0.001*hx ) - return true; - else - return false; - } - } - - return false; -} #endif diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index 60c690e06..57b1886e8 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -1,9 +1,9 @@ /*************************************************************************** - FastSweepingMethod.h - description - ------------------- - begin : Jul 14, 2016 - copyright : (C) 2017 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz + FastSweepingMethod.h - description + ------------------- + begin : Jul 14, 2016 + copyright : (C) 2017 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ @@ -17,132 +17,134 @@ template< typename Mesh, - typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > + typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > class FastSweepingMethod { }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 1, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 1, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 1, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + + protected: const IndexType maxIterations; }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 2, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - using typename BaseType::ArrayContainer; - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 2, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + protected: const IndexType maxIterations; }; template< typename Real, - typename Device, - typename Index, - typename Anisotropy > + typename Device, + typename Index, + typename Anisotropy > class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy > - : public tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > +: public tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > { - //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); - - public: - - typedef Meshes::Grid< 3, Real, Device, Index > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Anisotropy AnisotropyType; - typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; - - using typename BaseType::InterfaceMapType; - using typename BaseType::MeshFunctionType; - using typename BaseType::InterfaceMapPointer; - using typename BaseType::MeshFunctionPointer; - - FastSweepingMethod(); - - const IndexType& getMaxIterations() const; - - void setMaxIterations( const IndexType& maxIterations ); - - void solve( const MeshPointer& mesh, - const AnisotropyPointer& anisotropy, - MeshFunctionPointer& u ); - - - protected: + //static_assert( std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." ); + + public: + + typedef Meshes::Grid< 3, Real, Device, Index > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef Anisotropy AnisotropyType; + typedef tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > BaseType; + using MeshPointer = Pointers::SharedPointer< MeshType >; + using AnisotropyPointer = Pointers::SharedPointer< AnisotropyType, DeviceType >; + + using typename BaseType::InterfaceMapType; + using typename BaseType::MeshFunctionType; + using typename BaseType::InterfaceMapPointer; + using typename BaseType::MeshFunctionPointer; + using typename BaseType::ArrayContainer; + + + FastSweepingMethod(); + + const IndexType& getMaxIterations() const; + + void setMaxIterations( const IndexType& maxIterations ); + + void solve( const MeshPointer& mesh, + const AnisotropyPointer& anisotropy, + MeshFunctionPointer& u ); + + + protected: const IndexType maxIterations; }; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index b823fec03..07be36571 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -15,9 +15,12 @@ #include "tnlFastSweepingMethod.h" #include -#include +#include + + +#include #include #include @@ -80,16 +83,48 @@ solve( const MeshPointer& mesh, MeshFunctionType aux = *auxPtr; +//#ifdef HAVE_MPI + bool a = Communicators::MpiCommunicator::IsInitialized(); + if( a ) + printf("Je Init\n"); + else + printf("Neni Init\n"); +//#endif while( iteration < this->maxIterations ) { if( std::is_same< DeviceType, Devices::Host >::value ) { - int numThreadsPerBlock = 16; + int numThreadsPerBlock = -1; + + numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0)); + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + if( numThreadsPerBlock <= 16 ) + numThreadsPerBlock = 16; + else if(numThreadsPerBlock <= 32 ) + numThreadsPerBlock = 32; + else if(numThreadsPerBlock <= 64 ) + numThreadsPerBlock = 64; + else if(numThreadsPerBlock <= 128 ) + numThreadsPerBlock = 128; + else if(numThreadsPerBlock <= 256 ) + numThreadsPerBlock = 256; + else if(numThreadsPerBlock <= 512 ) + numThreadsPerBlock = 512; + else + numThreadsPerBlock = 1024; + //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock); + + if( numThreadsPerBlock == -1 ){ + printf("Fail in setting numThreadsPerBlock.\n"); + break; + } + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; /*Real **sArray = new Real*[numBlocksX*numBlocksY]; @@ -115,13 +150,29 @@ solve( const MeshPointer& mesh, } std::cout<template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + switch ( numThreadsPerBlock ){ + case 16: + this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 32: + this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 64: + this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 128: + this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 256: + this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + case 512: + this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + default: + this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + } + //Reduction for( int i = 0; i < BlockIterHost.getSize(); i++ ){ @@ -131,14 +182,14 @@ solve( const MeshPointer& mesh, } } numWhile++; - std::cout <<"numWhile = "<< numWhile < BlockIterPom1; - BlockIterPom1.setSize( numBlocksX * numBlocksY ); - BlockIterPom1.setValue( 0 );*/ + BlockIterPom1.setSize( numBlocksX * numBlocksY ); + BlockIterPom1.setValue( 0 );*/ /*int *BlockIterDevice; cudaMalloc((void**) &BlockIterDevice, ( numBlocksX * numBlocksY ) * sizeof( int ) );*/ int nBlocksNeigh = ( numBlocksX * numBlocksY )/1024 + ((( numBlocksX * numBlocksY )%1024 != 0) ? 1:0); @@ -408,6 +459,7 @@ solve( const MeshPointer& mesh, } iteration++; } + //#endif aux.save("aux-final.tnl"); } @@ -527,7 +579,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< /** FOR FIM METHOD */ - + if( BlockIterDevice[ blockIdx.y * gridDim.x + blockIdx.x ] ) { __syncthreads(); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 65aba5bf5..5af33cf29 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -64,9 +64,6 @@ solve( const MeshPointer& mesh, interfaceMapPtr->setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, auxPtr, interfaceMapPtr ); -#ifdef HAVE_CUDA - cudaDeviceSynchronize(); -#endif auxPtr->save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); @@ -78,170 +75,259 @@ solve( const MeshPointer& mesh, { if( std::is_same< DeviceType, Devices::Host >::value ) { - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-1.tnl" ); + int numThreadsPerBlock = 64; - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "2 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-2.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "3 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-3.tnl" ); - for( cell.getCoordinates().z() = 0; - cell.getCoordinates().z() < mesh->getDimensions().z(); - cell.getCoordinates().z()++ ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "4 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-4.tnl" ); + int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0); + int numBlocksZ = mesh->getDimensions().z() / numThreadsPerBlock + (mesh->getDimensions().z() % numThreadsPerBlock != 0 ? 1:0); + //std::cout << "numBlocksX = " << numBlocksX << std::endl; - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "5 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-5.tnl" ); + /*Real **sArray = new Real*[numBlocksX*numBlocksY]; + for( int i = 0; i < numBlocksX * numBlocksY; i++ ) + sArray[ i ] = new Real [ (numThreadsPerBlock + 2)*(numThreadsPerBlock + 2)];*/ - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = 0; - cell.getCoordinates().y() < mesh->getDimensions().y(); - cell.getCoordinates().y()++ ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "6 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); - } - } - } - //aux.save( "aux-6.tnl" ); + ArrayContainer BlockIterHost; + BlockIterHost.setSize( numBlocksX * numBlocksY * numBlocksZ ); + BlockIterHost.setValue( 1 ); + int IsCalculationDone = 1; - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0 ; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = 0; - cell.getCoordinates().x() < mesh->getDimensions().x(); - cell.getCoordinates().x()++ ) - { - //std::cerr << "7 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + MeshFunctionPointer helpFunc( mesh ); + MeshFunctionPointer helpFunc1( mesh ); + helpFunc1 = auxPtr; + auxPtr = helpFunc; + helpFunc = helpFunc1; + //std::cout<< "Size = " << BlockIterHost.getSize() << std::endl; + /*for( int k = numBlocksX-1; k >-1; k-- ){ + for( int l = 0; l < numBlocksY; l++ ){ + std::cout<< BlockIterHost[ l*numBlocksX + k ]; + } + std::cout<template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock/*, sArray*/ ); + + //Reduction + for( int i = 0; i < BlockIterHost.getSize(); i++ ){ + if( IsCalculationDone == 0 ){ + IsCalculationDone = IsCalculationDone || BlockIterHost[ i ]; + //break; } } - } - //aux.save( "aux-7.tnl" ); - - for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; - cell.getCoordinates().z() >= 0; - cell.getCoordinates().z()-- ) - { - for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; - cell.getCoordinates().y() >= 0; - cell.getCoordinates().y()-- ) - { - for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; - cell.getCoordinates().x() >= 0 ; - cell.getCoordinates().x()-- ) - { - //std::cerr << "8 -> "; - cell.refresh(); - if( ! interfaceMap( cell ) ) - this->updateCell( aux, cell ); + numWhile++; + std::cout <<"numWhile = "<< numWhile <-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ){ + //std::cout << (*auxPtr)[ k * numBlocksX * numBlocksY + j * numBlocksX + i ] << " "; + std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ]; + } + std::cout << std::endl; } + std::cout << std::endl; } + std::cout << std::endl;*/ + + this->getNeighbours( BlockIterHost, numBlocksX, numBlocksY, numBlocksZ ); + + /*for( int k = 0; k < numBlocksZ; k++ ){ + for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ){ + //std::cout << (*auxPtr)[ k * numBlocksX * numBlocksY + j * numBlocksX + i ] << " "; + std::cout << BlockIterHost[ k * numBlocksX * numBlocksY + j * numBlocksX + i ]; + } + std::cout << std::endl; + } + std::cout << std::endl; + }*/ + + /*for( int j = numBlocksY-1; j>-1; j-- ){ + for( int i = 0; i < numBlocksX; i++ ) + std::cout << "BlockIterHost = "<< j*numBlocksX + i<< " ," << BlockIterHost[ j * numBlocksX + i ]; + std::cout << std::endl; + } + std::cout << std::endl;*/ + + //std::cout<getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-1.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "2 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-2.tnl" ); + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "3 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-3.tnl" ); + + for( cell.getCoordinates().z() = 0; + cell.getCoordinates().z() < mesh->getDimensions().z(); + cell.getCoordinates().z()++ ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "4 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-4.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "5 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-5.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = 0; + cell.getCoordinates().y() < mesh->getDimensions().y(); + cell.getCoordinates().y()++ ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "6 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-6.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0 ; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = 0; + cell.getCoordinates().x() < mesh->getDimensions().x(); + cell.getCoordinates().x()++ ) + { + //std::cerr << "7 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + } + //aux.save( "aux-7.tnl" ); + + for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; + cell.getCoordinates().z() >= 0; + cell.getCoordinates().z()-- ) + { + for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; + cell.getCoordinates().y() >= 0; + cell.getCoordinates().y()-- ) + { + for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; + cell.getCoordinates().x() >= 0 ; + cell.getCoordinates().x()-- ) + { + //std::cerr << "8 -> "; + cell.refresh(); + if( ! interfaceMap( cell ) ) + this->updateCell( aux, cell ); + } + } + }*/ } if( std::is_same< DeviceType, Devices::Cuda >::value ) { @@ -389,7 +475,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< { __syncthreads(); - __shared__ volatile bool changed[ (sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)]; + __shared__ volatile bool changed[ 8*8*8/*(sizeSArray - 2)*(sizeSArray - 2)*(sizeSArray - 2)*/]; changed[ currentIndex ] = false; if( thrj == 0 && thri == 0 && thrk == 0 ) @@ -402,6 +488,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thrj == 1 && thri == 1 && thrk == 1 ) { + //printf( "We are in the calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ); hx = mesh.getSpaceSteps().x(); hy = mesh.getSpaceSteps().y(); hz = mesh.getSpaceSteps().z(); @@ -410,8 +497,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< dimZ = mesh.getDimensions().z(); BlockIterDevice[ blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ] = 0; } - __shared__ volatile Real sArray[sizeSArray][sizeSArray][sizeSArray]; - sArray[thrk+1][thrj+1][thri+1] = std::numeric_limits< Real >::max(); + __shared__ volatile Real sArray[ 10*10*10/*sizeSArray * sizeSArray * sizeSArray*/ ]; + sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = std::numeric_limits< Real >::max(); //filling sArray edges int numOfBlockx; @@ -426,6 +513,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< numOfBlockx = gridDim.x; numOfBlocky = gridDim.y; numOfBlockz = gridDim.z; + __syncthreads(); if( numOfBlockx - 1 == blIdx ) xkolik = dimX - (blIdx)*blockDim.x+1; @@ -438,54 +526,55 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( thri == 0 ) { if( blIdx != 0 && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; + sArray[(thrk+1 )* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX -1 + thrk*dimX*dimY ]; else - sArray[thrk+1][thrj+1][0] = std::numeric_limits< Real >::max(); + sArray[(thrk+1)* sizeSArray * sizeSArray + (thrj+1)*sizeSArray + 0] = std::numeric_limits< Real >::max(); } if( thri == 1 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik && thrk+1 < zkolik ) - sArray[thrk+1][thrj+1][9] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + xkolik ] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy *blockDim.y*dimX+ blIdx*blockDim.x + blockDim.x + thrj * dimX + thrk*dimX*dimY ]; else - sArray[thrk+1][thrj+1][9] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1)*sizeSArray + xkolik] = std::numeric_limits< Real >::max(); } if( thri == 2 ) { if( blIdy != 0 && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][0][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray +0*sizeSArray + thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX + thrj + thrk*dimX*dimY ]; else - sArray[thrk+1][0][thrj+1] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + 0*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 3 ) { if( dimY > (blIdy+1) * blockDim.y && thrj+1 < xkolik && thrk+1 < zkolik ) - sArray[thrk+1][9][thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = aux[ blIdz*blockDim.z * dimX * dimY + (blIdy+1) * blockDim.y*dimX + blIdx*blockDim.x + thrj + thrk*dimX*dimY ]; else - sArray[thrk+1][9][thrj+1] = std::numeric_limits< Real >::max(); + sArray[ (thrk+1) * sizeSArray * sizeSArray + ykolik*sizeSArray + thrj+1] = std::numeric_limits< Real >::max(); } if( thri == 4 ) { if( blIdz != 0 && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[0][thrj+1][thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; + sArray[ 0 * sizeSArray * sizeSArray +(thrj+1 )* sizeSArray + thrk+1] = aux[ blIdz*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x - dimX * dimY + thrj * dimX + thrk ]; else - sArray[0][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + sArray[0 * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thrk+1] = std::numeric_limits< Real >::max(); } if( thri == 5 ) { if( dimZ > (blIdz+1) * blockDim.z && thrj+1 < ykolik && thrk+1 < xkolik ) - sArray[9][thrj+1][thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = aux[ (blIdz+1)*blockDim.z * dimX * dimY + blIdy * blockDim.y*dimX + blIdx*blockDim.x + thrj * dimX + thrk ]; else - sArray[9][thrj+1][thrk+1] = std::numeric_limits< Real >::max(); + sArray[zkolik * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thrk+1] = std::numeric_limits< Real >::max(); } if( i < dimX && j < dimY && k < dimZ ) { - sArray[thrk+1][thrj+1][thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; + sArray[(thrk+1) * sizeSArray * sizeSArray + (thrj+1) *sizeSArray + thri+1] = aux[ k*dimX*dimY + j*dimX + i ]; } __syncthreads(); + while( changed[ 0 ] ) { __syncthreads(); @@ -493,11 +582,11 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< changed[ currentIndex ] = false; //calculation of update cell - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < dimZ ) + if( i < dimX && j < dimY && k < dimZ ) { - if( ! interfaceMap[ k*dimX*dimY + j * mesh.getDimensions().x() + i ] ) + if( ! interfaceMap[ k*dimX*dimY + j * dimX + i ] ) { - changed[ currentIndex ] = ptr.updateCell( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); + changed[ currentIndex ] = ptr.updateCell3D< sizeSArray >( sArray, thri+1, thrj+1, thrk+1, hx,hy,hz); } } __syncthreads(); @@ -535,7 +624,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } } __syncthreads(); - if( currentIndex < 32 ) //POUZE IF JSOU SINCHRONNI NA JEDNOM WARPU + if( currentIndex < 32 ) { if( true ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 32 ]; if( currentIndex < 16 ) changed[ currentIndex ] = changed[ currentIndex ] || changed[ currentIndex + 16 ]; @@ -548,7 +637,8 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< /*if(thri == 0 && thrj ==0 && thrk ==0 && blIdx == 0 && blIdy == 0 && blIdz == 0) { - for(int m = 0; m < 8; m++){ + //for(int m = 0; m < 8; m++){ + int m = 4; for(int n = 0; n<8; n++){ for(int b=0; b<8; b++) printf(" %i ", changed[m*64 + n*8 + b]); @@ -556,16 +646,19 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } printf("\n \n"); } - }*/ + //}*/ + if( changed[ 0 ] && thri == 0 && thrj == 0 && thrk == 0 ) { - BlockIterDevice[ blIdz * numOfBlockx * numOfBlocky + blIdy * numOfBlockx + blIdx ] = 1; + //printf( "Setting block calculation. Block = %d.\n",blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x ); + BlockIterDevice[ blIdz * gridDim.x * gridDim.y + blIdy * gridDim.x + blIdx ] = 1; } __syncthreads(); } if( i < dimX && j < dimY && k < dimZ ) - helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[thrk+1][ thrj + 1 ][ thri + 1 ]; + helpFunc[ k*dimX*dimY + j * dimX + i ] = sArray[ (thrk+1) * sizeSArray * sizeSArray + (thrj+1) * sizeSArray + thri+1 ]; + } } #endif -- GitLab From 1163bf19405e8f2622a96540190d55dd284037a6 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 29 Nov 2018 13:51:34 +0100 Subject: [PATCH 19/20] Moving old code to Legacy. --- .../fast-sweeping-map/CMakeLists.txt | 0 .../Legacy/fast-sweeping-map}/MainBuildConfig.h | 0 .../fast-sweeping-map/fastSweepingMapConfig.h | 0 .../Legacy/fast-sweeping-map}/main.cpp | 0 .../Legacy/fast-sweeping-map}/main.cu | 0 .../Solvers => Legacy}/fast-sweeping-map/main.h | 0 .../fast-sweeping-map/tnlFastSweepingMap.h | 0 .../tnlFastSweepingMap2D_CUDA_v4_impl.h | 0 .../fast-sweeping-map/tnlFastSweepingMap2D_impl.h | 0 .../fast-sweeping-map/tnlFastSweepingMap_CUDA.h | 0 .../Solvers => Legacy}/fast-sweeping/CMakeLists.txt | 0 .../fast-sweeping}/MainBuildConfig.h | 0 .../fast-sweeping/fastSweepingConfig.h | 0 .../fast-sweeping}/main.cpp | 0 .../fast-sweeping}/main.cu | 0 .../Solvers => Legacy}/fast-sweeping/main.h | 0 .../fast-sweeping/tnlFastSweeping.h | 0 .../fast-sweeping/tnlFastSweeping2D_CUDA_impl.h | 0 .../fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h | 0 .../fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h | 0 .../fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h | 0 .../fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h | 0 .../fast-sweeping/tnlFastSweeping2D_impl.h | 0 .../fast-sweeping/tnlFastSweeping2D_openMP_impl.h | 0 .../fast-sweeping/tnlFastSweeping3D_CUDA_impl.h | 0 .../fast-sweeping/tnlFastSweeping3D_impl.h | 0 .../fast-sweeping/tnlFastSweepingSolver.h | 0 .../fast-sweeping/tnlFastSweeping_CUDA.h | 0 .../hamilton-jacobi-parallel-map/CMakeLists.txt | 0 .../hamilton-jacobi-parallel-map}/MainBuildConfig.h | 0 .../hamilton-jacobi-parallel-map/gnuplot.txt | 0 .../hamilton-jacobi-parallel-map/main.cpp | 0 .../hamilton-jacobi-parallel-map/main.cu | 0 .../hamilton-jacobi-parallel-map/main.h | 0 .../hamilton-jacobi-parallel-map/mapa_png.png | Bin .../hamilton-jacobi-parallel-map/no-Makefile | 0 .../parallelMapConfig.h | 0 .../hamilton-jacobi-parallel-map/run | 0 .../hamilton-jacobi-parallel-map/tnl-err2eoc-2.py | 0 .../tnlParallelMapSolver.h | 0 .../tnlParallelMapSolver2D_impl.h | 0 .../hamilton-jacobi-parallel/CMakeLists.txt | 0 .../hamilton-jacobi-parallel}/MainBuildConfig.h | 0 .../hamilton-jacobi-parallel/main.cpp | 0 .../hamilton-jacobi-parallel/main.cu | 0 .../hamilton-jacobi-parallel/main.h | 0 .../hamilton-jacobi-parallel/no-Makefile | 0 .../parallelEikonalConfig.h | 0 .../Solvers => Legacy}/hamilton-jacobi-parallel/run | 0 .../hamilton-jacobi-parallel/tnl-err2eoc-2.py | 0 .../tnlParallelEikonalSolver.h | 0 .../tnlParallelEikonalSolver2D_impl.h | 0 .../tnlParallelEikonalSolver3D_impl.h | 0 .../Legacy}/narrow-band/CMakeLists.txt | 0 .../narrow-band}/MainBuildConfig.h | 0 .../fast-sweeping => Legacy/narrow-band}/main.cpp | 0 .../fast-sweeping => Legacy/narrow-band}/main.cu | 0 src/{Examples => TNL/Legacy}/narrow-band/main.h | 0 .../Legacy}/narrow-band/narrowBandConfig.h | 0 .../Legacy}/narrow-band/tnlNarrowBand.h | 0 .../narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h | 0 .../narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h | 0 .../Legacy}/narrow-band/tnlNarrowBand2D_impl.h | 0 .../Legacy}/narrow-band/tnlNarrowBand3D_CUDA_impl.h | 0 .../Legacy}/narrow-band/tnlNarrowBand3D_impl.h | 0 .../Legacy}/narrow-band/tnlNarrowBand_CUDA.h | 0 66 files changed, 0 insertions(+), 0 deletions(-) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/CMakeLists.txt (100%) rename src/{Examples/narrow-band => TNL/Legacy/fast-sweeping-map}/MainBuildConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/fastSweepingMapConfig.h (100%) rename src/{Examples/narrow-band => TNL/Legacy/fast-sweeping-map}/main.cpp (100%) rename src/{Examples/narrow-band => TNL/Legacy/fast-sweeping-map}/main.cu (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/main.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/tnlFastSweepingMap.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/tnlFastSweepingMap2D_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping-map/tnlFastSweepingMap_CUDA.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/CMakeLists.txt (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map => Legacy/fast-sweeping}/MainBuildConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/fastSweepingConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map => Legacy/fast-sweeping}/main.cpp (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map => Legacy/fast-sweeping}/main.cu (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/main.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping2D_openMP_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping3D_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweepingSolver.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/fast-sweeping/tnlFastSweeping_CUDA.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/CMakeLists.txt (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/fast-sweeping => Legacy/hamilton-jacobi-parallel-map}/MainBuildConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/gnuplot.txt (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/main.cpp (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/main.cu (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/main.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/mapa_png.png (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/no-Makefile (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/parallelMapConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/run (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/CMakeLists.txt (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map => Legacy/hamilton-jacobi-parallel}/MainBuildConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/main.cpp (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/main.cu (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/main.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/no-Makefile (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/parallelEikonalConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/run (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/tnl-err2eoc-2.py (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers => Legacy}/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/CMakeLists.txt (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel => Legacy/narrow-band}/MainBuildConfig.h (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/fast-sweeping => Legacy/narrow-band}/main.cpp (100%) rename src/TNL/{Experimental/Hamilton-Jacobi/Solvers/fast-sweeping => Legacy/narrow-band}/main.cu (100%) rename src/{Examples => TNL/Legacy}/narrow-band/main.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/narrowBandConfig.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand2D_impl.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand3D_CUDA_impl.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand3D_impl.h (100%) rename src/{Examples => TNL/Legacy}/narrow-band/tnlNarrowBand_CUDA.h (100%) diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/CMakeLists.txt rename to src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt diff --git a/src/Examples/narrow-band/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h similarity index 100% rename from src/Examples/narrow-band/MainBuildConfig.h rename to src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/fastSweepingMapConfig.h b/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/fastSweepingMapConfig.h rename to src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h diff --git a/src/Examples/narrow-band/main.cpp b/src/TNL/Legacy/fast-sweeping-map/main.cpp similarity index 100% rename from src/Examples/narrow-band/main.cpp rename to src/TNL/Legacy/fast-sweeping-map/main.cpp diff --git a/src/Examples/narrow-band/main.cu b/src/TNL/Legacy/fast-sweeping-map/main.cu similarity index 100% rename from src/Examples/narrow-band/main.cu rename to src/TNL/Legacy/fast-sweeping-map/main.cu diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.h b/src/TNL/Legacy/fast-sweeping-map/main.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.h rename to src/TNL/Legacy/fast-sweeping-map/main.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap.h rename to src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h rename to src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_impl.h rename to src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap_CUDA.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap_CUDA.h rename to src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping/CMakeLists.txt similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/CMakeLists.txt rename to src/TNL/Legacy/fast-sweeping/CMakeLists.txt diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/MainBuildConfig.h rename to src/TNL/Legacy/fast-sweeping/MainBuildConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/fastSweepingConfig.h b/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/fastSweepingConfig.h rename to src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.cpp b/src/TNL/Legacy/fast-sweeping/main.cpp similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.cpp rename to src/TNL/Legacy/fast-sweeping/main.cpp diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.cu b/src/TNL/Legacy/fast-sweeping/main.cu similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.cu rename to src/TNL/Legacy/fast-sweeping/main.cu diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.h b/src/TNL/Legacy/fast-sweeping/main.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.h rename to src/TNL/Legacy/fast-sweeping/main.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_openMP_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_openMP_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_impl.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweepingSolver.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweepingSolver.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping_CUDA.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping_CUDA.h rename to src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/CMakeLists.txt rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/MainBuildConfig.h rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/gnuplot.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/gnuplot.txt rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.cpp rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.cu rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.h rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/mapa_png.png b/src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/mapa_png.png rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/no-Makefile rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/parallelMapConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/parallelMapConfig.h rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/run b/src/TNL/Legacy/hamilton-jacobi-parallel-map/run similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/run rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/run diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h rename to src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/CMakeLists.txt rename to src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/MainBuildConfig.h rename to src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.cpp rename to src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.cu rename to src/TNL/Legacy/hamilton-jacobi-parallel/main.cu diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel/main.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.h rename to src/TNL/Legacy/hamilton-jacobi-parallel/main.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/no-Makefile rename to src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/parallelEikonalConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/parallelEikonalConfig.h rename to src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/run b/src/TNL/Legacy/hamilton-jacobi-parallel/run similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/run rename to src/TNL/Legacy/hamilton-jacobi-parallel/run diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnl-err2eoc-2.py rename to src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h rename to src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h rename to src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h rename to src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h diff --git a/src/Examples/narrow-band/CMakeLists.txt b/src/TNL/Legacy/narrow-band/CMakeLists.txt similarity index 100% rename from src/Examples/narrow-band/CMakeLists.txt rename to src/TNL/Legacy/narrow-band/CMakeLists.txt diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/MainBuildConfig.h b/src/TNL/Legacy/narrow-band/MainBuildConfig.h similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/MainBuildConfig.h rename to src/TNL/Legacy/narrow-band/MainBuildConfig.h diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.cpp b/src/TNL/Legacy/narrow-band/main.cpp similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.cpp rename to src/TNL/Legacy/narrow-band/main.cpp diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.cu b/src/TNL/Legacy/narrow-band/main.cu similarity index 100% rename from src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.cu rename to src/TNL/Legacy/narrow-band/main.cu diff --git a/src/Examples/narrow-band/main.h b/src/TNL/Legacy/narrow-band/main.h similarity index 100% rename from src/Examples/narrow-band/main.h rename to src/TNL/Legacy/narrow-band/main.h diff --git a/src/Examples/narrow-band/narrowBandConfig.h b/src/TNL/Legacy/narrow-band/narrowBandConfig.h similarity index 100% rename from src/Examples/narrow-band/narrowBandConfig.h rename to src/TNL/Legacy/narrow-band/narrowBandConfig.h diff --git a/src/Examples/narrow-band/tnlNarrowBand.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand.h diff --git a/src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h diff --git a/src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h diff --git a/src/Examples/narrow-band/tnlNarrowBand2D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand2D_impl.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h diff --git a/src/Examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h diff --git a/src/Examples/narrow-band/tnlNarrowBand3D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand3D_impl.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h diff --git a/src/Examples/narrow-band/tnlNarrowBand_CUDA.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h similarity index 100% rename from src/Examples/narrow-band/tnlNarrowBand_CUDA.h rename to src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h -- GitLab From e7217db4a436509220b84019d4ee6b7d6c20a650 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 29 Nov 2018 14:02:03 +0100 Subject: [PATCH 20/20] Deleting legacy code. --- .../Legacy/fast-sweeping-map/CMakeLists.txt | 22 - .../fast-sweeping-map/MainBuildConfig.h | 64 - .../fast-sweeping-map/fastSweepingMapConfig.h | 39 - src/TNL/Legacy/fast-sweeping-map/main.cpp | 17 - src/TNL/Legacy/fast-sweeping-map/main.cu | 17 - src/TNL/Legacy/fast-sweeping-map/main.h | 88 - .../fast-sweeping-map/tnlFastSweepingMap.h | 188 -- .../tnlFastSweepingMap2D_CUDA_v4_impl.h | 1051 --------- .../tnlFastSweepingMap2D_impl.h | 823 ------- .../tnlFastSweepingMap_CUDA.h | 196 -- src/TNL/Legacy/fast-sweeping/CMakeLists.txt | 22 - .../Legacy/fast-sweeping/MainBuildConfig.h | 64 - .../Legacy/fast-sweeping/fastSweepingConfig.h | 38 - src/TNL/Legacy/fast-sweeping/main.cpp | 17 - src/TNL/Legacy/fast-sweeping/main.cu | 17 - src/TNL/Legacy/fast-sweeping/main.h | 88 - .../Legacy/fast-sweeping/tnlFastSweeping.h | 186 -- .../tnlFastSweeping2D_CUDA_impl.h | 522 ----- .../tnlFastSweeping2D_CUDA_v2_impl.h | 588 ----- .../tnlFastSweeping2D_CUDA_v3_impl.h | 920 -------- .../tnlFastSweeping2D_CUDA_v4_impl.h | 1003 --------- .../tnlFastSweeping2D_CUDA_v5_impl.h | 697 ------ .../fast-sweeping/tnlFastSweeping2D_impl.h | 927 -------- .../tnlFastSweeping2D_openMP_impl.h | 399 ---- .../tnlFastSweeping3D_CUDA_impl.h | 961 -------- .../fast-sweeping/tnlFastSweeping3D_impl.h | 307 --- .../fast-sweeping/tnlFastSweepingSolver.h | 36 - .../fast-sweeping/tnlFastSweeping_CUDA.h | 194 -- .../CMakeLists.txt | 23 - .../MainBuildConfig.h | 64 - .../hamilton-jacobi-parallel-map/gnuplot.txt | 32 - .../hamilton-jacobi-parallel-map/main.cpp | 17 - .../hamilton-jacobi-parallel-map/main.cu | 17 - .../hamilton-jacobi-parallel-map/main.h | 98 - .../hamilton-jacobi-parallel-map/mapa_png.png | Bin 24841 -> 0 bytes .../hamilton-jacobi-parallel-map/no-Makefile | 41 - .../parallelMapConfig.h | 47 - .../Legacy/hamilton-jacobi-parallel-map/run | 43 - .../tnl-err2eoc-2.py | 141 -- .../tnlParallelMapSolver.h | 217 -- .../tnlParallelMapSolver2D_impl.h | 1315 ----------- .../hamilton-jacobi-parallel/CMakeLists.txt | 23 - .../MainBuildConfig.h | 64 - .../Legacy/hamilton-jacobi-parallel/main.cpp | 17 - .../Legacy/hamilton-jacobi-parallel/main.cu | 17 - .../Legacy/hamilton-jacobi-parallel/main.h | 142 -- .../hamilton-jacobi-parallel/no-Makefile | 41 - .../parallelEikonalConfig.h | 46 - src/TNL/Legacy/hamilton-jacobi-parallel/run | 64 - .../hamilton-jacobi-parallel/tnl-err2eoc-2.py | 141 -- .../tnlParallelEikonalSolver.h | 366 ---- .../tnlParallelEikonalSolver2D_impl.h | 1928 ----------------- .../tnlParallelEikonalSolver3D_impl.h | 1706 --------------- src/TNL/Legacy/narrow-band/CMakeLists.txt | 22 - src/TNL/Legacy/narrow-band/MainBuildConfig.h | 64 - src/TNL/Legacy/narrow-band/main.cpp | 17 - src/TNL/Legacy/narrow-band/main.cu | 17 - src/TNL/Legacy/narrow-band/main.h | 88 - src/TNL/Legacy/narrow-band/narrowBandConfig.h | 40 - src/TNL/Legacy/narrow-band/tnlNarrowBand.h | 186 -- .../tnlNarrowBand2D_CUDA_v4_impl.h | 1317 ----------- .../tnlNarrowBand2D_CUDA_v5_impl.h | 1313 ----------- .../Legacy/narrow-band/tnlNarrowBand2D_impl.h | 927 -------- .../narrow-band/tnlNarrowBand3D_CUDA_impl.h | 961 -------- .../Legacy/narrow-band/tnlNarrowBand3D_impl.h | 307 --- .../Legacy/narrow-band/tnlNarrowBand_CUDA.h | 203 -- 66 files changed, 21563 deletions(-) delete mode 100644 src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt delete mode 100644 src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/main.cpp delete mode 100644 src/TNL/Legacy/fast-sweeping-map/main.cu delete mode 100644 src/TNL/Legacy/fast-sweeping-map/main.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h delete mode 100644 src/TNL/Legacy/fast-sweeping/CMakeLists.txt delete mode 100644 src/TNL/Legacy/fast-sweeping/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping/main.cpp delete mode 100644 src/TNL/Legacy/fast-sweeping/main.cu delete mode 100644 src/TNL/Legacy/fast-sweeping/main.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel-map/run delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/main.cu delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/main.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel/run delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/CMakeLists.txt delete mode 100644 src/TNL/Legacy/narrow-band/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/narrow-band/main.cpp delete mode 100644 src/TNL/Legacy/narrow-band/main.cu delete mode 100644 src/TNL/Legacy/narrow-band/main.h delete mode 100644 src/TNL/Legacy/narrow-band/narrowBandConfig.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h diff --git a/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt deleted file mode 100644 index 3f9db0da0..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -set( tnl_fast_sweeping_map_SOURCES -# MainBuildConfig.h -# tnlFastSweepingMap2D_impl.h -# tnlFastSweepingMap.h -# fastSweepingMapConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(fast-sweeping-map main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(fast-sweeping-map main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (fast-sweeping-map tnl ) - - -INSTALL( TARGETS fast-sweeping-map - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_fast_sweeping_map_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/fast-sweeping-map ) diff --git a/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h deleted file mode 100644 index ed3d686eb..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" < struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h b/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h deleted file mode 100644 index 9251deca8..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h +++ /dev/null @@ -1,39 +0,0 @@ -/*************************************************************************** - fastSweepingConfig.h - description - ------------------- - begin : Oct 15, 2015 - copyright : (C) 2015 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef FASTSWEEPINGCONFIG_H_ -#define FASTSWEEPINGCONFIG_H_ - -#include - -template< typename ConfigTag > -class fastSweepingMapConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); - config.addRequiredEntry < String > ( "map", "Gradient map for solver"); - } -}; - -#endif /* FASTSWEEPINGCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/main.cpp b/src/TNL/Legacy/fast-sweeping-map/main.cpp deleted file mode 100644 index 8849008ff..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping-map/main.cu b/src/TNL/Legacy/fast-sweeping-map/main.cu deleted file mode 100644 index 8849008ff..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping-map/main.h b/src/TNL/Legacy/fast-sweeping-map/main.h deleted file mode 100644 index 6f23851c2..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/main.h +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - - -#include "MainBuildConfig.h" - //for HOST versions: -#include "tnlFastSweepingMap.h" - //for DEVICE versions: -//#include "tnlFastSweepingMap_CUDA.h" -#include "fastSweepingMapConfig.h" -#include - -#include -#include -#include -#include - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - fastSweepingMapConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - tnlFastSweepingMap, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <, double, int> solver; -// if(!solver.init(parameters)) -// { -// cerr << "Solver failed to initialize." < -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_OPENMP -#include -#endif - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweepingMap -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - tnlFastSweepingMap(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - - void setupSquare1000(Index i, Index j); - void setupSquare1100(Index i, Index j); - void setupSquare1010(Index i, Index j); - void setupSquare1001(Index i, Index j); - void setupSquare1110(Index i, Index j); - void setupSquare1101(Index i, Index j); - void setupSquare1011(Index i, Index j); - void setupSquare1111(Index i, Index j); - void setupSquare0000(Index i, Index j); - void setupSquare0100(Index i, Index j); - void setupSquare0010(Index i, Index j); - void setupSquare0001(Index i, Index j); - void setupSquare0110(Index i, Index j); - void setupSquare0101(Index i, Index j); - void setupSquare0011(Index i, Index j); - void setupSquare0111(Index i, Index j); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - int something_changed; - - tnlMeshFunction dofVector, dofVector2; - DofVectorType data,map; - - RealType h; - - tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweepingMap(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j, const Index k); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - - tnlMeshFunction dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity; - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - //for single core version use this implementation: -#include "tnlFastSweepingMap2D_impl.h" - //for parallel version use this one instead: -// #include "tnlFastSweepingMap2D_openMP_impl.h" - -// #include "tnlFastSweepingMap3D_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h deleted file mode 100644 index d02b8d6c5..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h +++ /dev/null @@ -1,1051 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweepingMap.h" - -#define MAP_SOLVER_MAX_VALUE 3 - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweepingMap< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <("map"); - if(! this->map.load( mapFile )) - cout << "Failed to load map file : " << mapFile <(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(map_cuda), this->map.getSize()*sizeof(double)); - cudaMemcpy(map_cuda, this->map.getData(), this->map.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(changed), sizeof(int)); - //counter == 0 --> setting changed to 0 - cudaMemcpy(changed, &counter, sizeof(int), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 1024); - dim3 numBlocks(4,1); - - int run = 1; - int zero = 0; - int cntr = 0; - - while(run != 0) - { - cudaMemcpy(this->changed, &zero, sizeof(int), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - runCUDA<<>>(this->cudaSolver,0,0, this->changed); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(&run, this->changed,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cntr++; - cout << "Finished set of sweeps #" << cntr << " " << run <dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index* something_changed) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - - if(map_cuda[Entity.getIndex()] != 0.0) - { - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real im = abs(1.0/map_cuda[Entity.getIndex()]); - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= im*h) - tmp = fabsMin(a,b) + sign(value)*im*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) ); - - // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - - if(abs(value)-abs(tmp) > 0.0) - atomicMax(something_changed,1); - } - else - { - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), MAP_SOLVER_MAX_VALUE); - } - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); - - if(abs(cudaDofVector[gid]) < 1.01*h) - { - cudaDofVector2[gid] = cudaDofVector[gid]; - if(map_cuda[gid] != 0.0) - cudaDofVector2[gid] /=map_cuda[gid]; - } - - - - - -// if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) -// { -// if(cudaDofVector[Entity.getIndex()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed) -{ - - __shared__ int something_changed; - if(threadIdx.x+threadIdx.y == 0) - something_changed = 0; - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - __syncthreads(); - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,&something_changed); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,&something_changed); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,&something_changed); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,&something_changed); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - - - if(threadIdx.x+threadIdx.y == 0) - atomicMax(changed, something_changed); - - - - -} - - -__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h deleted file mode 100644 index 4bd9e17c5..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h +++ /dev/null @@ -1,823 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - - -#define MAP_SOLVER_MAX_VALUE 3 - - -#include "tnlFastSweepingMap.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweepingMap< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <("map"); - if(! this->map.load( mapFile )) - cout << "Failed to load map file : " << mapFile <(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - cout << "a" < -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) - { - dofVector2[i]=INT_MAX*sign(dofVector[i]); - - if(abs(dofVector[i]) < 1.01*h) - { - dofVector2[i] = dofVector[i]; - if(map[i] != 0.0) - dofVector2[i] /= map[i]; - } - } - -// for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) -// { -// for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) -// { -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// neighborEntities.refresh(Mesh,Entity.getIndex()); -// -// if(dofVector[this->Entity.getIndex()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } -// } - cout << "a" < -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - int cntr = 0; - while(something_changed != 0) - { - something_changed = 0; - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - cntr++; - cout << "Finished set of sweeps #" << cntr << " " << something_changed < -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - if(map[Entity.getIndex()] != 0.0) - { - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - Real value = dofVector2[Entity.getIndex()]; - Real im = abs(1.0/map[Entity.getIndex()]); - Real a,b, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(fabs(a-b) >= im*h) - tmp = fabsMin(a,b) + sign(value)*im*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) ); - - if(abs(value)-abs(tmp) > 0.0) - something_changed = 1; - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); - - } - else - { - dofVector2[Entity.getIndex()] = MAP_SOLVER_MAX_VALUE; - } -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h deleted file mode 100644 index a23057e78..000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h +++ /dev/null @@ -1,196 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap_CUDA.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING_H_ -#define TNLFASTSWEEPING_H_ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweepingMap -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweepingMap(); - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(); - __device__ void updateValue(const Index i, const Index j, Index* something_changed); - __device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - double* map_cuda; - int counter; - int* changed; - __device__ void setupSquare1000(Index i, Index j); - __device__ void setupSquare1100(Index i, Index j); - __device__ void setupSquare1010(Index i, Index j); - __device__ void setupSquare1001(Index i, Index j); - __device__ void setupSquare1110(Index i, Index j); - __device__ void setupSquare1101(Index i, Index j); - __device__ void setupSquare1011(Index i, Index j); - __device__ void setupSquare1111(Index i, Index j); - __device__ void setupSquare0000(Index i, Index j); - __device__ void setupSquare0100(Index i, Index j); - __device__ void setupSquare0010(Index i, Index j); - __device__ void setupSquare0001(Index i, Index j); - __device__ void setupSquare0110(Index i, Index j); - __device__ void setupSquare0101(Index i, Index j); - __device__ void setupSquare0011(Index i, Index j); - __device__ void setupSquare0111(Index i, Index j); -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction dofVector; - DofVectorType data, map; - - - RealType h; - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(int i, int j, int k); - __device__ void updateValue(const Index i, const Index j, const Index k); - __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction dofVector; - DofVectorType data; - - RealType h; - - -}; - - - - - - - -#ifdef HAVE_CUDA -//template -__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed); -//__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); - -__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -//__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver); -#endif - -/*various implementtions.... choose one*/ -//#include "tnlFastSweepingMap2D_CUDA_impl.h" -//#include "tnlFastSweepingMap2D_CUDA_v2_impl.h" -//#include "tnlFastSweepingMap2D_CUDA_v3_impl.h" -#include "tnlFastSweepingMap2D_CUDA_v4_impl.h" -//#include "tnlFastSweepingMap2D_CUDA_v5_impl.h" - - -// #include "tnlFastSweepingMap3D_CUDA_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping/CMakeLists.txt deleted file mode 100644 index 1a23d646a..000000000 --- a/src/TNL/Legacy/fast-sweeping/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -set( tnl_fast_sweeping_SOURCES -# MainBuildConfig.h -# tnlFastSweeping2D_impl.h -# tnlFastSweeping.h -# fastSweepingConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(fast-sweeping main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(fast-sweeping main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (fast-sweeping tnl ) - - -INSTALL( TARGETS fast-sweeping - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_fast_sweeping_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/fast-sweeping ) diff --git a/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h deleted file mode 100644 index ed3d686eb..000000000 --- a/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" < struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h b/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h deleted file mode 100644 index 3df2c1e88..000000000 --- a/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h +++ /dev/null @@ -1,38 +0,0 @@ -/*************************************************************************** - fastSweepingConfig.h - description - ------------------- - begin : Oct 15, 2015 - copyright : (C) 2015 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef FASTSWEEPINGCONFIG_H_ -#define FASTSWEEPINGCONFIG_H_ - -#include - -template< typename ConfigTag > -class fastSweepingConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); - } -}; - -#endif /* FASTSWEEPINGCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/main.cpp b/src/TNL/Legacy/fast-sweeping/main.cpp deleted file mode 100644 index 8849008ff..000000000 --- a/src/TNL/Legacy/fast-sweeping/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping/main.cu b/src/TNL/Legacy/fast-sweeping/main.cu deleted file mode 100644 index 8849008ff..000000000 --- a/src/TNL/Legacy/fast-sweeping/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping/main.h b/src/TNL/Legacy/fast-sweeping/main.h deleted file mode 100644 index e5ac15fed..000000000 --- a/src/TNL/Legacy/fast-sweeping/main.h +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - - -#include "MainBuildConfig.h" - //for HOST versions: -#include "tnlFastSweeping.h" - //for DEVICE versions: -//#include "tnlFastSweeping_CUDA.h" -#include "fastSweepingConfig.h" -#include - -#include -#include -#include -#include - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - fastSweepingConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - tnlFastSweeping, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." < -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_OPENMP -#include -#endif - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweeping -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - tnlFastSweeping(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - - void setupSquare1000(Index i, Index j); - void setupSquare1100(Index i, Index j); - void setupSquare1010(Index i, Index j); - void setupSquare1001(Index i, Index j); - void setupSquare1110(Index i, Index j); - void setupSquare1101(Index i, Index j); - void setupSquare1011(Index i, Index j); - void setupSquare1111(Index i, Index j); - void setupSquare0000(Index i, Index j); - void setupSquare0100(Index i, Index j); - void setupSquare0010(Index i, Index j); - void setupSquare0001(Index i, Index j); - void setupSquare0110(Index i, Index j); - void setupSquare0101(Index i, Index j); - void setupSquare0011(Index i, Index j); - void setupSquare0111(Index i, Index j); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - tnlMeshFunction dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweeping(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j, const Index k); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - - tnlMeshFunction dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity; - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - //for single core version use this implementation: -#include "tnlFastSweeping2D_impl.h" - //for parallel version use this one instead: -// #include "tnlFastSweeping2D_openMP_impl.h" - -#include "tnlFastSweeping3D_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h deleted file mode 100644 index bc1da169c..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h +++ /dev/null @@ -1,522 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(32, 32); - dim3 numBlocks(n/32 + 1 ,n/32 +1); - - for(int i = 2*n - 1; i > -1; i--) - { - runCUDA<<>>(this->cudaSolver,4,i); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - for(int i = 0; i < 2*n ; i++) - { - runCUDA<<>>(this->cudaSolver,1,i); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - for(int i = 0; i < 2*n ; i++) - { - runCUDA<<>>(this->cudaSolver,2,i); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - for(int i = 2*n - 1; i > -1; i--) - { - runCUDA<<>>(this->cudaSolver,3,i); - cudaDeviceSynchronize(); - } - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - cudaDofVector[index] = fabsMin(value, tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - Real fy = abs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - return; - int total = solver->Mesh.getDimensions().x(); - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - int id1 = gx+gy; - int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - /*---------------------------------------------------------------------------------------------------------------------------*/ - if(sweep == 1) -// for(int i = 0; i < 2*total - 1; i++) - { - if(id1 == i) - { - solver->updateValue(gx,gy); - return; - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - else if(sweep == 2) -// for(int i = 0; i < 2*total - 1; i++) - { - if(id2 == i) - { - solver->updateValue(gx,gy); - return; - } - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - else if(sweep == 3) -// for(int i = 2*total - 2; i > -1; i--) - { - if(id1 == i) - { - solver->updateValue(gx,gy); - return; - } - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - else if(sweep == 4) -// for(int i = 2*total - 2; i > -1; i--) - { - if(id2 == i) - { - solver->updateValue(gx,gy); - return; - } - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h deleted file mode 100644 index 3ad5b7944..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h +++ /dev/null @@ -1,588 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v2_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(27, 27); - dim3 numBlocks(1 ,1); - -// for(int i = 2*n - 1; i > -1; i--) - { - runCUDA<<>>(this->cudaSolver,4,0); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -//// for(int i = 0; i < 2*n ; i++) -// { -// runCUDA<<>>(this->cudaSolver,1,0); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -//// for(int i = 0; i < 2*n ; i++) -// { -// runCUDA<<>>(this->cudaSolver,2,0); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -//// for(int i = 2*n - 1; i > -1; i--) -// { -// runCUDA<<>>(this->cudaSolver,3,0); -// cudaDeviceSynchronize(); -// } -// -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - cudaDofVector[index] = fabsMin(value, tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - - Real tmpMin = Min(fx,abs(y)); - - if(tmpMin == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) -{ - - //int gx = threadIdx.x; - //int gy = threadIdx.y; - int id1,id2; - int nx = solver->Mesh.getDimensions().x()+ threadIdx.x; - int ny = solver->Mesh.getDimensions().y()+ threadIdx.y; - - int blockCount = solver->Mesh.getDimensions().x()/blockDim.x + 1; - - for(int gy = threadIdx.y; gy < ny;gy+=blockDim.y) - { - for(int gx = threadIdx.x; gx < nx;gx+=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - - } - //gx+=blockDim.x; - //__syncthreads(); - } - //gx = threadIdx.x; - //gy+=blockDim.y; - //__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ -// gx = blockDim.x*(blockCount-1) + threadIdx.x; -// gy = threadIdx.y; - for(int gy = threadIdx.y; gy < ny;gy+=blockDim.y) - { - for(int gx = blockDim.x*(blockCount-1) + threadIdx.x; gx >- 1;gx-=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - } - //gx-=blockDim.x; - //__syncthreads(); - } - //gx = blockDim.x*(blockCount-1) + threadIdx.x; - //gy+=blockDim.y; - //__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ -// gx = blockDim.x*(blockCount-1) + threadIdx.x; -// gy = blockDim.x*(blockCount-1) + threadIdx.y; - for(int gy = blockDim.x*(blockCount-1) +threadIdx.y; gy >- 1;gy-=blockDim.y) - { - for(int gx = blockDim.x*(blockCount-1) + threadIdx.x; gx >- 1;gx-=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - } - //gx-=blockDim.x; - //__syncthreads(); - } - //gx = blockDim.x*(blockCount-1) + threadIdx.x; - //gy-=blockDim.y; - //__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - //gx = threadIdx.x; - //gy = blockDim.x*(blockCount-1) +threadIdx.y; - for(int gy = blockDim.x*(blockCount-1) +threadIdx.y; gy >- 1;gy-=blockDim.y) - { - for(int gx = threadIdx.x; gx < nx;gx+=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - } - //gx+=blockDim.x; - //__syncthreads(); - } - //gx = threadIdx.x; - //gy-=blockDim.y; - ///__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h deleted file mode 100644 index ff36d3f8e..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h +++ /dev/null @@ -1,920 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v3_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - - - - -__device__ double atomicSet(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong(val )); - } while (assumed != old); - return __longlong_as_double(old); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 +1 ,n/16 +1); - int m =n/16 +1; - - for(int i = 0; i < 2*m -1; i++) - { - runCUDA<15><<>>(this->cudaSolver,1,i); - //cudaDeviceSynchronize(); - } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -// for(int i = 0; i < 2*m -1; i++) -// { -// runCUDA<2><<>>(this->cudaSolver,2,i); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -// for(int i = 0; i < 2*m -1; i++) -// { -// runCUDA<4><<>>(this->cudaSolver,4,i); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -// for(int i = 0; i < 2*m -1; i++) -// { -// runCUDA<8><<>>(this->cudaSolver,8,i); -// cudaDeviceSynchronize(); -// } - - - - -// for(int i = 0; i < (2*m -1)/4 -1; i++) -// { -// runCUDA<15><<>>(this->cudaSolver,15,i);//all -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1)/4 -1; i < (2*m -1)/2 -1; i++) -// { -// runCUDA<5><<>>(this->cudaSolver,5,i); //two -// cudaDeviceSynchronize(); -// runCUDA<10><<>>(this->cudaSolver,10,i); //two -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1)/2 -1; i < (2*m -1)/2 +1; i++) -// { -// runCUDA<1><<>>(this->cudaSolver,1,i); //separate -// cudaDeviceSynchronize(); -// runCUDA<2><<>>(this->cudaSolver,2,i); //separate -// cudaDeviceSynchronize(); -// runCUDA<4><<>>(this->cudaSolver,4,i); //separate -// cudaDeviceSynchronize(); -// runCUDA<8><<>>(this->cudaSolver,8,i); //separate -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1)/2 +1; i < (2*m -1/4)*3 +1; i++) -// { -// runCUDA<5><<>>(this->cudaSolver,5,i); //two -// cudaDeviceSynchronize(); -// runCUDA<10><<>>(this->cudaSolver,10,i); //two -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1/4)*3 +1; i < 2*m -1; i++) -// { -// runCUDA<15><<>>(this->cudaSolver,15,i);//all -// cudaDeviceSynchronize(); -// } -cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - atomicSet(&cudaDofVector[index],fabsMin(value, tmp)); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ -// Real fx = abs(x); -// -// Real tmpMin = Min(fx,abs(y)); - - if(abs(y) > abs(x)) - return x; - else - return y; - - -} - - -template<> -__global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) -{ - - if(blockIdx.x+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ -} - template<> - __global__ void runCUDA<2>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - - } - } /*---------------------------------------------------------------------------------------------------------------------------*/ - template<> - __global__ void runCUDA<4>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - } - - template<> - __global__ void runCUDA<8>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - -} - - - template<> - __global__ void runCUDA<5>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - - if(blockIdx.x+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - else if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - } - - - template<> - __global__ void runCUDA<10>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - - else if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - - } - - - - template<> - __global__ void runCUDA<15>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - - if(blockIdx.x+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - - } - - - - - - - - - - - - - - - - - - - - - - - - - - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - - - -//__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) -//{ -// -// if(sweep==1 && blockIdx.x+blockIdx.y == k) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id1 = threadIdx.x+threadIdx.y; -// -// for(int l = 0; l < 2*blockDim.x - 1; l++) -// { -// if(id1 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// else if(sweep==2 && (gridDim.x - blockIdx.x - 1)+blockIdx.y == k) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; -// -// for(int l = 0; l < 2*blockDim.x - 1; l++) -// { -// if(id2 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// else if(sweep==4 && blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id1 = threadIdx.x+threadIdx.y; -// -// for(int l = 2*blockDim.x - 2; l > -1; l--) -// { -// if(id1 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// return; -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// else if(sweep==8 && (gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; -// -// for(int l = 2*blockDim.x - 2; l > -1; l--) -// { -// if(id2 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// return; -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// -// -// -//} - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h deleted file mode 100644 index e0a9697c2..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h +++ /dev/null @@ -1,1003 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 1024); - dim3 numBlocks(4,1); - - - runCUDA<<>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - -// cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); -// -// if(abs(cudaDofVector[gid]) < 1.01*h) -// cudaDofVector2[gid] = cudaDofVector[gid]; - - - - - - if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() && !exactInput) - { - if(cudaDofVector[Entity.getIndex()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } - if(exactInput) - { - if(abs(cudaDofVector[gid]) < 1.5*h) - cudaDofVector2[gid] = cudaDofVector[gid]; - } - - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); -// Entity.setCoordinates(CoordinatesType(i,j)); -// Entity.refresh(); -// tnlNeighborGridEntityGetter,2> neighborEntities(Entity); -// cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); -// Entity.setCoordinates(CoordinatesType(i,j)); -// Entity.refresh(); -// tnlNeighborGridEntityGetter,2> neighborEntities(Entity); -// cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=INT_MAX; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=INT_MAX; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-INT_MAX; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-INT_MAX; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b = 1.0; - c = -be; - s = h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h deleted file mode 100644 index 1591bb613..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h +++ /dev/null @@ -1,697 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v5_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 512); - dim3 numBlocks(4,1); - - - runCUDA<<>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - cudaDofVector[index] = fabsMin(value, tmp); - -} - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, double** sharedMem, int k3) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = sharedMem[k3+1][threadIdx.y]; - Real a,b, tmp; - - if( i == 0 ) - a = sharedMem[k3][threadIdx.y]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = sharedMem[k3+2][threadIdx.y]; - else - { - a = fabsMin( sharedMem[k3][threadIdx.y], - sharedMem[k3+2][threadIdx.y] ); - } - - if( j == 0 ) - b = sharedMem[k3][threadIdx.y+1]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = sharedMem[k3+2][threadIdx.y-1]; - else - { - b = fabsMin( sharedMem[k3][threadIdx.y+1], - sharedMem[k3+2][threadIdx.y-1] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - -// sharedMem[k3+1][threadIdx.y] = this->fabsMin(value, tmp); -// atomicFabsMin(&(cudaDofVector[index]), tmp); - cudaDofVector[index] = tmp; - this->fabsMin(value, tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - extern __shared__ double u[]; - double* sharedMem[5]; - sharedMem[0] = u; - sharedMem[1] = &(u[blockDim.y+1]); - sharedMem[2] = &(sharedMem[1][blockDim.y+1]); - sharedMem[3] = sharedMem[1]; - sharedMem[4] = sharedMem[2]; - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - - if(blockIdx.x==0) - { - if(threadIdx.y==0) - sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,0))]; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - int k3=k%3; - - if(threadIdx.y==0) - { - if(gx==n-1) - sharedMem[k3][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,gy+blockDim.y))]; - else - sharedMem[k3][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))]; - } -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3+2][threadIdx.y-1]; - - if(gycudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; - - solver->updateValue(gx,gy,sharedMem,k3); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } -// else if(blockIdx.x==1) -// { -// gx=n-1; -// gy=threadIdx.y; -// -// if(threadIdx.y==0) -// sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,0))]; -// -// for(int k = 0; k < n*blockCount + blockDim.y; k++) -// { -// if(threadIdx.y < k+1 && gy < n) -// { -// int k3=k%3; -// -// if(threadIdx.y==0) -// if(gx==0) -// sharedMem[k3+2][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,gy+blockDim.y))]; -// else -// sharedMem[k3+2][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx-1,gy))]; -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3][threadIdx.y-1]; -// -// if(gycudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; -// -// -// solver->updateValue(gx,gy,sharedMem,k3); -// gx--; -// if(gx==-1) -// { -// gx=n-1; -// gy+=blockDim.y; -// } -// } -// -// -// __syncthreads(); -// } -// } -// else if(blockIdx.x==2) -// { -// gx=0; -// gy=n-blockDim.y-1+threadIdx.y; -// -// if(threadIdx.y==0) -// sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,n-1))]; -// -// for(int k = 0; k < n*blockCount + blockDim.y; k++) -// { -// if(blockDim.y-threadIdx.y < k+1 && gy > -1) -// { -// int k3=k%3; -// -// if(threadIdx.y==blockDim.y-1) -// if(gx==n-1) -// sharedMem[k3][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,gy-blockDim.y))]; -// else -// sharedMem[k3][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))]; -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3+2][threadIdx.y-1]; -// -// if(gycudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; -// -// -// solver->updateValue(gx,gy,sharedMem,k3); -// gx++; -// if(gx==n) -// { -// gx=0; -// gy-=blockDim.y; -// } -// } -// -// -// __syncthreads(); -// } -// } -// else if(blockIdx.x==3) -// { -// gx=n-1; -// gy=n-blockDim.y-1+threadIdx.y; -// -// if(threadIdx.y==0) -// sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,n-1))]; -// -// -// for(int k = 0; k < n*blockCount + blockDim.y; k++) -// { -// if(blockDim.y-threadIdx.y < k+1 && gy > -1) -// { -// int k3=k%3; -// -// if(threadIdx.y==blockDim.y-1) -// if(gx==n-1) -// sharedMem[k3+2][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,gy-blockDim.y))]; -// else -// sharedMem[k3+2][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))]; -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3][threadIdx.y-1]; -// -// if(gycudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; -// -// -// solver->updateValue(gx,gy,sharedMem,k3); -// gx--; -// if(gx==-1) -// { -// gx=n-1; -// gy-=blockDim.y; -// } -// } -// -// -// __syncthreads(); -// } -// } - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h deleted file mode 100644 index c4ce8fe6b..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h +++ /dev/null @@ -1,927 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - cout << "a" < -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) - { - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) - { - for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) - { - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - neighborEntities.refresh(Mesh,Entity.getIndex()); - - if(dofVector[this->Entity.getIndex()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } - } - cout << "a" < 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// i = 0; -// j = Mesh.getDimensions().y() -1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - //data.setLike(dofVector2.getData()); - //data=dofVector2.getData(); - //cout << data.getType() < -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - -// data.setLike(dofVector2.getData()); -// data = dofVector2.getData(); -// cout << data.getType() < -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - Real value = dofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); - -// if(dofVector2[Entity.getIndex()] > 1.0) -// cout << value << " " << tmp << " " << dofVector2[Entity.getIndex()] < -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h deleted file mode 100644 index 54bbe931e..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h +++ /dev/null @@ -1,399 +0,0 @@ -/*************************************************************************** - tnlFastSweeping_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - -#ifdef HAVE_OPENMP -// gridLock = (omp_lock_t*) malloc(sizeof(omp_lock_t)*Mesh.getDimensions().x()*Mesh.getDimensions().y()); -// -// for(int i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) -// omp_init_lock(&gridLock[i]); -#endif - - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - Real tmp = 0.0; - - if(!exactInput) - { - for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) - dofVector[i]=0.5*h*sign(dofVector[i]); - } - - - for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) - { - for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) - { - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - - - - for(int i = 1; i < Mesh.getDimensions().x()-1; i++) - { - Index j = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - for(int i = 1; i < Mesh.getDimensions().x()-1; i++) - { - Index j = Mesh.getDimensions().y() - 1; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - for(int j = 1; j < Mesh.getDimensions().y()-1; j++) - { - Index i = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - for(int j = 1; j < Mesh.getDimensions().y()-1; j++) - { - Index i = Mesh.getDimensions().x() - 1; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - - Index i = Mesh.getDimensions().x() - 1; - Index j = Mesh.getDimensions().y() - 1; - - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - - j = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - - i = 0; - j = Mesh.getDimensions().y() -1; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - - j = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - dofVector.save("u-00000.tnl"); - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - DofVectorType d2,d3,d4; - d2.setLike(dofVector); - d2=dofVector; - d3.setLike(dofVector); - d3=dofVector; - d4.setLike(dofVector); - d4=dofVector; - - -#ifdef HAVE_OPENMP -#pragma omp parallel sections num_threads(4) - { - { -#endif - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,&dofVector); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } -#pragma omp section - { -#endif - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,&d2); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } -#pragma omp section - { -#endif - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j, &d3); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } -#pragma omp section - { -#endif - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j, &d4); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } - } -#endif - - -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - int index = Mesh.getCellIndex(CoordinatesType(i,j)); - dofVector[index] = fabsMin(dofVector[index], d2[index]); - dofVector[index] = fabsMin(dofVector[index], d3[index]); - dofVector[index] = fabsMin(dofVector[index], d4[index]); - } - } - - dofVector.save("u-00001.tnl"); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, DofVectorType* grid) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = (*grid)[index]; - Real a,b, tmp; - - if( i == 0 ) - a = (*grid)[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = (*grid)[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( (*grid)[Mesh.template getCellNextToCell<-1,0>(index)], - (*grid)[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = (*grid)[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = (*grid)[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( (*grid)[Mesh.template getCellNextToCell<0,-1>(index)], - (*grid)[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - -#ifdef HAVE_OPENMP -// omp_set_lock(&gridLock[index]); -#endif - (*grid)[index] = fabsMin(value, tmp); -#ifdef HAVE_OPENMP -// omp_unset_lock(&gridLock[index]); -#endif -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - - -} - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h deleted file mode 100644 index 6a5195cfe..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h +++ /dev/null @@ -1,961 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING3D_IMPL_H_ -#define TNLFASTSWEEPING3D_IMPL_H_ - -#include "tnlFastSweeping.h" - -//__device__ -//double fabsMin( double x, double y) -//{ -// double fx = abs(x); -// -// if(Min(fx,abs(y)) == fx) -// return x; -// else -// return y; -//} -// -//__device__ -//double atomicFabsMin(double* address, double val) -//{ -// unsigned long long int* address_as_ull = -// (unsigned long long int*)address; -// unsigned long long int old = *address_as_ull, assumed; -// do { -// assumed = old; -// old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); -// } while (assumed != old); -// return __longlong_as_double(old); -//} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(8, 8,8); - dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 1024); - dim3 numBlocks(8,1); - - - runCUDA<<>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - tnlNeighborGridEntityGetter,3> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()] ); - } - - if( k == 0 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()]; - else - { - c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - int gid = Entity.getIndex(); - - if(abs(cudaDofVector[gid]) < 1.0*h) - cudaDofVector2[gid] = 0.5*h;//cudaDofVector[gid]; - else - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = 0; - int gy = threadIdx.y; - - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - - if(blockIdx.x==0) - { - for(int gz = 0; gz < n;gz++) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==2) - { - - for(int gz = 0; gz < n;gz++) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==3) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - - else if(blockIdx.x==4) - { - for(int gz = n-1; gz > -1;gz--) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==5) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==6) - { - - for(int gz = n-1; gz > -1;gz--) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==7) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gz = blockDim.z*blockIdx.z + threadIdx.z; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz) - { - solver->initGrid(gx,gy,gz); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -//} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h deleted file mode 100644 index e22de0ab8..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h +++ /dev/null @@ -1,307 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING3D_IMPL_H_ -#define TNLFASTSWEEPING3D_IMPL_H_ - -#include "tnlFastSweeping.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; -// cout << "bla "< -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++) - { - - if (abs(dofVector[i]) < 1.8*h) - dofVector2[i]=dofVector[i]; - else - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - - - - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - dofVector2.save("u-00001.tnl"); - - cout << "bla 3"< -void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - this->Entity.setCoordinates(CoordinatesType(i,j,k)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter,3> neighborEntities(Entity); - Real value = dofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()] ); - } - - if( k == 0 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()]; - else - { - c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h deleted file mode 100644 index fc9eb5459..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * File: tnlFastSweepingSolver.h - * Author: oberhuber - * - * Created on July 12, 2016, 6:04 PM - */ - -#pragma once - -#include -#include - -template< typename Mesh, - typename Communicator, - typename Anisotropy = tnlConstanstFunction< Mesh > > -class tnlFastSweepingSolver : public tnlPDEProblem< Mesh, - Communicator, - typename Mesh::RealType, - typename Mesh::DeviceType, - typename Mesh::IndexType > -{ - public: - - typedef typename DifferentialOperator::RealType RealType; - typedef typename Mesh::DeviceType DeviceType; - typedef typename DifferentialOperator::IndexType IndexType; - - typedef tnlMeshFunction< Mesh > MeshFunctionType; - typedef tnlPDEProblem< Mesh, TimeDependentProblem, RealType, DeviceType, IndexType > BaseType; - - using typename BaseType::MeshType; - using typename BaseType::DofVectorType; - using typename BaseType::MeshDependentDataType; -}; - - diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h deleted file mode 100644 index f531da431..000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h +++ /dev/null @@ -1,194 +0,0 @@ -/*************************************************************************** - tnlFastSweeping_CUDA.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING_H_ -#define TNLFASTSWEEPING_H_ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweeping -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweeping(); - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(); - __device__ void updateValue(const Index i, const Index j); - __device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; - __device__ void setupSquare1000(Index i, Index j); - __device__ void setupSquare1100(Index i, Index j); - __device__ void setupSquare1010(Index i, Index j); - __device__ void setupSquare1001(Index i, Index j); - __device__ void setupSquare1110(Index i, Index j); - __device__ void setupSquare1101(Index i, Index j); - __device__ void setupSquare1011(Index i, Index j); - __device__ void setupSquare1111(Index i, Index j); - __device__ void setupSquare0000(Index i, Index j); - __device__ void setupSquare0100(Index i, Index j); - __device__ void setupSquare0010(Index i, Index j); - __device__ void setupSquare0001(Index i, Index j); - __device__ void setupSquare0110(Index i, Index j); - __device__ void setupSquare0101(Index i, Index j); - __device__ void setupSquare0011(Index i, Index j); - __device__ void setupSquare0111(Index i, Index j); -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction dofVector; - DofVectorType data; - - - RealType h; - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(int i, int j, int k); - __device__ void updateValue(const Index i, const Index j, const Index k); - __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction dofVector; - DofVectorType data; - - RealType h; - - -}; - - - - - - - -#ifdef HAVE_CUDA -//template -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver); -#endif - -/*various implementtions.... choose one*/ -//#include "tnlFastSweeping2D_CUDA_impl.h" -//#include "tnlFastSweeping2D_CUDA_v2_impl.h" -//#include "tnlFastSweeping2D_CUDA_v3_impl.h" -#include "tnlFastSweeping2D_CUDA_v4_impl.h" -//#include "tnlFastSweeping2D_CUDA_v5_impl.h" - - -#include "tnlFastSweeping3D_CUDA_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt deleted file mode 100644 index 48382df82..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set( tnl_hamilton_jacobi_parallel_map_SOURCES -# MainBuildConfig.h -# tnlParallelMapSolver2D_impl.h -# tnlParallelMapSolver.h -# parallelMapConfig.h -# main.cu - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel-map main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(hamilton-jacobi-parallel-map main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (hamilton-jacobi-parallel-map tnl ) - - -INSTALL( TARGETS hamilton-jacobi-parallel-map - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_map_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/hamilton-jacobi-parallel-map ) diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h deleted file mode 100644 index ed3d686eb..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" < struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt deleted file mode 100644 index d4ae61983..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt +++ /dev/null @@ -1,32 +0,0 @@ -tomas@tomas-linux:~/Desktop/VU_CPU_MAPA/work_dir$ gnuplot - - G N U P L O T - Version 4.6 patchlevel 4 last modified 2013-10-02 - Build System: Linux x86_64 - - Copyright (C) 1986-1993, 1998, 2004, 2007-2013 - Thomas Williams, Colin Kelley and many others - - gnuplot home: http://www.gnuplot.info - faq, bugs, etc: type "help FAQ" - immediate help: type "help" (plot window: hit 'h') - -Terminal type set to 'wxt' -gnuplot> set cntrparam levels 15 -gnuplot> set cntrparam bspline -gnuplot> set contour -gnuplot> splot 'u-00001.gplt' - -gnuplot> unset surface -gnuplot> splot 'u-00001.gplt' - -gnuplot> set table "test.gplt" -gnuplot> splot 'u-00001.gplt' -gnuplot> unset table - -gnuplot> set table "test2.gplt" -gnuplot> plot 'test.gplt' index 10 -gnuplot> unset table - -gnuplot> plot 'test2.gplt' - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp deleted file mode 100644 index b13498e17..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Jul 8 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu deleted file mode 100644 index 710197671..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cu - description - ------------------- - begin : Mar 30 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h deleted file mode 100644 index fff21c77e..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h +++ /dev/null @@ -1,98 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnlParallelMapSolver.h" -#include "parallelMapConfig.h" -#include "MainBuildConfig.h" -#include -#include -#include -#include -#include -#include - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - parallelMapConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - - tnlDeviceEnum device; - device = TNL::Devices::HostDevice; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - - typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost; -/*#ifdef HAVE_CUDA - typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; -#endif -#ifndef HAVE_CUDA*/ - typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice; -/*#endif*/ - - if(device==TNL::Devices::HostDevice) - { - typedef TNL::Devices::Host Device; - - - tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <, double, int > SchemeType; - - tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <{f0jf*t8*mNiD+mi3vZ1Z*H3GePjng4w+s!6ZeJp1jfIQdPfk=H!u?m04hj%1wk zxikMC-cA+qo~AMH@Z--LB5q$f6Ru+a`R~7d$DeoCecyfGbrN`{itJ-M@eSY0>p{ zvDJUSUca07{mq*#vU=KMr3XS86pk{@S}6nRgvL>GgM9ZojAGx#_OkT*gT%CpYccyLa!- zoik5AHIeGQ{Blq2?{DV&KJqTUxZ+}liPYJP^IvH(E!L>|{q61lAN~JNq!?||`ObMK z*dg#r-gfJ`=l1U18=|G!mBcP+A=Uft&6_2cU*?!)8_l%)`_lja)A)Z&uV&ePW#8R( zfTJlv!tHX2icshA$7Q=^kAEzSzT*CxVZL480-f1q-u(Gz6?mqgb zuHHc6%ImMMN)8=OGL-9Qu73P+ht6s9ygNUxZQu9wYn5bMV~F3>>95R~vbWyaxpQZR z$*IpZD?=`2ZGF3KMgKIX_1AB|{kC=M)}u#{9z1w3Yte;ijv_}>j81;8(GaT4y^je;zS98SwGKmGi(k7LoV42uIFyB0-gh}_EMZc^x2 zr17&RPM{?~MC`5!2UI-!dU|s5^V^*ketv%bJLjEj1W@OCjJDT*gXy@H+UoK`X z(BO)dSs0)(*Khlb15b;79_<#7j*b@BkK0r7@=|pE-ml+o=l{R8{%_Reyz6sWjuqZX zG1Ar7|Nkc4{`aTp`|s#;d>082F_r4&YfrAPx1WFB{C8Q`B9BQ^yq1cIi(B?B&Wu`o z@yyw?fA8M^D|>p=`m6&PBD*FZ(CG5>@nLB``1bAFI(K*eLs?tn_Se~paJec?{P^+X zGug#vW@bh+dlWdV=K2LrS-5cF`}+UCjzoNZgTZp|^vHj%R4 zyLE5>=B7m&GRJG}<{y7NbJnb^H{uF1eCk4+jsjw0VyB*dI&k1X{Qs}<@w-YoQzMsO zzIi};Zr3_hgRVt7r!SfO1xd~S|L3`{TJZeCwybmg%nc1UZrL)$Z+U8Do`<*pH({IF zUnY}J?)m%e_WytH|G)cKq0zM|YAwq}HnURQMnM)ydHMdMNwa6qzL_&E^`>28HJ6BM z?9QU2&p)>wPV9SpCaw7F^iye@ZO%V0+IhTR-Z}H}_xk_ej~_pto|e|u-hTV`+v{gp z+KQOE7VV7j3!G9m`?CA$g%>k4x^CUPDJd(P8o8}4q(jB?X3n>po73&)=O-jQ@Q(j= z>B*BPU%q@f+ASV>Y01sAEN%ivGE9O*N(%}M3=9&CX5Py6lhswvi8x4*u$)O4oLtCC&j_iKu4YW_SsJ3D{h z&t-Az^JhN2y7)9_&!QD^>)iuQW}n@5dzxJoFVEpOWxH?Y^c{a}bKZMW$)RrH&Lf-C z&p-OOLxF?m@SRZOb$4y_c8^I@QZ42B*_#i>=uP)f zN!mE$_S=1BZ*TQVo7X)ruUme!^^VoWzbs}_y_YXvzJ2@lUyWVtjfpcZW~4-Ji_t5V z5$|;2YGta3-GBeQLC@{C|L#1u`@Vbs@7>16#ut9x$eVn=?VY+o*CMOAeJUsC&Ye3E z3_zu%x1&JI%^Wq)qUvgE8NSygR!s^#Y=`gvdwaj`sdl^~htuk-SzB-Mz1{w{4wQc{ zztoH0_vilqzxS)Fs}CPO{5PiLy@NnYh!$&AxwN$OF!t+d~L!<~16%E|zZjzv3m?%cUygMz1!4@Ylr zuOZLpM@PFuw5Iwk|IPBw!(#5auV23wSlF1FhVnM1Mov@VbiKIEWaallHHK2=#si<9 zpATL+MS(+YGUt&--M-yMh1ff)Fj+sxKNce9dAezL(|tk0%&d$ndSc zT4iDCdU=Zi2dG|qQMP;e^5wf9Grv`2n(d=D`|PvNKgGnw_2c*PIN#f(^ZfJA8aw^2 zMH*c{Yve*Iv<~yPIw{USe>-QJR_nIk8H^&Xnp3^(=Ie`aEe%@fp|a>=M#QwQ6*iM6 zPv$wiK%;8P{ItzCGfeVwb6cGjo;-Q-orT+1fyS*-Z{NJhv6}0rBGl^S7&s+HZ@S;| z@bGZQ9fz~F_Vx8i^&UGczgER_XH4Jo%2~^PZ!Ed3c)G|y!p6elfd!wP>E1YdGqbXH zcXkG|v(5E;e&)=XckkYPetup*cGr?mawoMeI^Xx;IUF&~bf(Y7h@9G5TM3>|Ht{-Q z#XDovCOZnS)actB;{U%u=k&{xsK6C!v+UqPDMx|LH}B{t&E>pc z!McZ+g^8mnW9u#d`d^n9FJ8>z_`srW*gm)#5&hU*UoP1-5u2DxM#H zb#2l0oK&@UpZ|kk#nVO_2~Uc4u3fu!>QqtJz&U=;w`O0Dn3gKhw(R9CspS#VWaZ`K zBO^PHCh?l@W)*P#`111d*RNlHtdN=Vy1M(og|Ny`DfJs8o>cz3QQuT~(L*I^`|W*GvkpJi)4m|=45 z=7)|&E5cTbx_)ftx2yPk*1X?t*;P)@Ni}xzHs$l|>*x8XY5%_x&EjS^Q*dk4+D?~8 zH^dM4%?U3Frv7KN3-P^Y(H~qPk zUDbC$L9(`>;KN0C`Mo7<>lnPe)}IR3?3#2o?efbl3LJ|x!q#tFs^Mic^Gep%zg<@n zMY@k3J$lqZVZzxoZ>Cs7X&!s_Ki{^xCuMg3hdOFoLaM$AQ z1#%i)oA&MFo3-#`Ma=Tckvd}QuXE@2`|dUMoTSn9?}@to%=sV?&flIIdGG%H{~!D7 z_iWxg`E1%k+lNo&U%E0Wb0lqzS{tTal6T<#1eHt^DRy>t$7kQK@PExP2?`}_O*dnz~g{+gt) zK)vhH-QDH(|9(7<&|%wq=W6^ycg{Cot0tdR3275xDYTlabDEEb=SI%9wED&a%;sOk zv|JVjeE9vhNg-u(S}5kv+_PZ<~eMz{r1|cS&J`bEDX4iVUk-pPpfOw zrcIxIe0&_Cvu*!={=IDJdp-JvBpe0y#`z1doZghSxw53>N{Z3eW!w3p*6d!~|32iz zra325F1;+#5Q&P5duG9R{Nu!l6K9*}8}Tr+?%Xz^UFBqFXD6r~WB==7|Gqy@_2q9% z-?w@5_ARJ{6ybXN_U+N6jgzl_^K*MIbR%o4)!cI~N)L;6>WF!3?|0+q_6qlgJZ{zy?d9pF=EaCS$n+`cHgzLw*LL&V{)KK^x-&< zd8SJwxm^Rh7HM=X%9K)D8X(fj_CEPN_adjAG3z39u9a1-7rIb8(Qo;Dn@SQoWpoP6?%&0!YD4?k-zW?XsxSyxwg@2b-a%)aO~ z3bHh|%`<+=lf~v5$ku!?X=8v$==sl03LF`Kuf8m4*Kd7n(Uzz`QTVXI-njDyJ#LE+ z8t@!1?A;ZLKE?-#7o)S$B{bCt*cC8FcBdOl@r?(#DUwT<$+rI8_gWa^G zjXU=2*|Tex7DtoLX@{PJ2OW3cz4vJI)0qweEtg+@`Th6w>C-mn&ASdvQgIYDm*DyL zWx4&`yLUrHmR9ngou_7SdXuoQu-*Lr=Reg1XDrfKd@*D5%`EQOfg)EkOaz!;fBhvK z6fe={SSjo%@ZmgL^TVZowAvCMbV)lUxCU<9z8zFw#ahlk-{0FCxi)O^MGF&?C6n&P zt2Zywn0`7`L*(1{@2~F|uh8J)c;UMEV!z$5jLOQNPuP0g7B1)HS$k2Ut?^OhVST=f z$!BHvNVGBXE50uNWAmXrd;9JGS2YtaIS(rXez7Hom$IY-va%eUy0=4lIm85j~5 zeEYIX;FPmx&z4xt^;<43F7CZ_(#@Q0QETP;)ARE9{MhH-n7im=hS$|fh3Z9>nospbf+q!=_XO9AhqkyrovHj1J{+n**EVfEsD8Je4IJfX= z70-FEiu(WF3_iWebc@y7w{Ht&rk{Rl_veGNqW~Kl+nqdf*FbUoxDpGQ^j%Z98XxZn zTYdND&7bY{Kic2dzON2dyPc5#fnC1l!<(C%Jya%nEuEG7eD#IbEN*t4rHM`3{g{MV z5;sO<73^v6RrK8SY|h)2b{ndgo`3%7qjtFP&fVSR#xs2u23*M6>g?>idGqGD_0v^O zzLUst3!Jj-WsP0^|9^jT%ua85v(mok|G&TecE3CXTAG`g9R;Eio^ZYpmT(j}^043y z81(e_U!QieL2CWkw8fdb=EZPi+%#MNf=O%Vg)4<`xEHkV3(?y9{a*D%50>MFlTOHkXJt$61)dr+eYzG^TzeO@HtcnQ#e&Vd>bxC}FaGy>PQ&8P zjSdPQK0I{pd+fDzk^;xhm~#&c_I$gQ?Y1~@{dMO>`&O!Ll{NUy`!^_Vy|`=Oyyxa- zW((dHrlu}c%WYtalJwdvEQDNW=!UTAavaK`bgtHT|4te^d@e-2ms;fEh9Hr;&l=#kQ4 zgPk#Xxw&uu{r#=(y=DbtEt4`wQ-+Du_S>>NY}%8|O?-nzSWAu;%IsQ@HNnwFTg0`I z@!~VV9RV6Ty1EDdL|Mq3OEF5@Tv=9jEp78go!2E+yOpg=)f_&be0;ng)Uo|{-2Pu7 zyXUempj5r+Vup>Jzlu;~bhNez*9A6?;#F(?Z#X=UJ>34U`{<)vw{D#~=Vu{v@6Mf` zz6AKDHJUZH)wplWTN2+3XU-QY7O%XaLQ-TCq>>9247o3=QGDT_PiR|@z!#EYR z^)`lRDRLy4NX?#mx=};wmip`G8&o_W7F6Wt>x*#N*8Vb)>V0I9w=qI5c2|ng%wxxo zCw>ff)_L#Zz@4HX;GrVaq~MTUzPs0oxg%~`xh2nwRi7(tcE-FbSQaSI^7QG`zkmP6 z=($gNV*P$k^2Uf+xjs8%+Mc`SHmo%~m>{rAR^GmD&%S+rla}1fnKxs`h1XvvPoC_z zd~+jPxlBrA-|@#QL$s!!Zaill()OjQR!6MaZcf?l2+?11KmIo)&N!PUEiYgH&CSiA+IVLS-!$D@Z_Anw8km|s z{aPhGyY#iil+&qwj~{)sII*~U(TuZc+W)z@s?%@nzZmz$_Je*y;*6U)Vy+(__uI$C z#o7I0idb@m$y+P1#I)D(g!(DdpEY&)`S089|0G(?Jytj;QKCs9$L#jn=meNU{*-@nawS=tXj{90vhX4d!jdXCxdO`9&w zI=)%w^wl%1HL@U9AWTCiC0bkW8L9=2wK z`_oT9^_wd!c<&$quX zl_`<2+D7q?D9b~Tqb+3G4)8lpIC;`@{dI0r1BtY(Z4+0yt_+Eah_JA*(3pO@)k$%# zU%2HXb{0Ri$+8pgDGRW?oRYE7^uo&$^)~`jRX9WRqYm`O=(Ruo;a%jrNMqf)b%$on zaZwWNKmN7q?}ZB&o;*=Gy{T-stl0GLYAKO@Zj0;o%O5icS}CH_XRGZJ_@q+KY>&yE z6UUky1eV0;t#;H1P;Jnx>3baav?VcSH|t%#2aOAbJp!lv{rmUo)vF;|TX*i1GWPgQ=bmy2liPP)aq`>vh2 zxp@Eae$@iI`SJ1b<}-aZZ{Dn%@vePJk%B;jn(+Md)6YNWWjp-g!-oeC9z1!nByRot zoBV#$p2q00zY5;4=;@1d&u0nty2a{+bGQ6^eMEm>(_aUv1s5|MtUrb7Kd7{m>vz6! z$;Ozgm5Iam?fdukm7iLkS57{8WYe0{Pd`=IY~H+i@5`cx4&0qd0S9vK?M(dMIraS9 z+{>@O?)^EZb>W@8d-q?&!%l&(0A#I^wsA0PX}_hIENZY@H{^^ z*IPyCxKPBjr!{u>KPHGWp=TnT92CWRxkl<&zLKvf8AonLpOE zJ-+gq`O`Y?ffB*i?&d#oTIo+1abJC}uHCA)OikE#5m)mhO%V#d@tYzot z+wb4EFHUQ!UhFQGqJRyaYtEe)GV4j*eDnMLdi#kU9+Of`q}0{a&ZK?*@L|KLU^U++ zozsgnie$F$-@kw3#y|~`uA@oWEf@V8G`gA;-n@NlIrki&f*dql4LO-uA-_{u`K^r<=`|PS00&oc46> z+O=oT`kqigNnCYsM-;uxBBX?ojXq&oJqI)JX6cw$u%%&Wr&50pW0-*`TEmOXKuaK*x1M| zuD4>p>H3EgQZ2>0k3RnBqc#~dKzC!~QA^Xu4<00>q`1sA-IF}W!CpL;v1`%f$&+XL zuvLBWQ!P-N-08Mh=6G&io}7%#fq5IX!`D^V%uC+hTWa{}XN}R!GijS=`lO|&v+p`} zD>tM!KxAoz&Z^1c$14&dr~M8zVQ{_p@L}Tq`~0&`uCJQ8NaMm!j<=^;ofr>qIC=8q z%a@t6mTis7?RPmnfu;53&)(kNw{LSJK3(|sSGjaifX175@0!#m-l{0HX{(U%1sl2Fc z*dtP+uLUK50}j1n%sB1{lcprv0F^NZpKztQk^c}zI{7({CMMmNh+L82MT4j zJo{wRpE%?A;Irm zZx@ry@wa8WZ@-O;kJlIBnxr!Ibn5>5{~sJ=p7&g{OG;jTd(>L1xqXQ<;v|-Zi?Ft^ zRLtvlTYNDi;1GO&cei-|amlvEyVu0J zk3KD0`F7=<0*^^YKG!S^Xn3|{>(;Gz@7^twz8du8bLAwJ(25!LsYOfU{hjOr|F-3< zTBI@4$8GghKY#!J{{HO_vQfMZk=+$Gd`SwePL8edkFG6<(c8Y!@OP-iyaNS1c^frX zIV}umcr{UhL*a(Kkx|jpQ=$=*uH4-kwR6HlC?7y$i;WYJ;g+O@r)?0q-KZ_cI^wShRQm^8RGr(0JRuyzPh0beaz~B;L4mDM)K-*U==u<-xo%c5@GC z<;bs=+whE^YsQ6$cz++pNm6Xhj(_txcrLto_l}K;@kYh8vuV|P_(FKURP8;Oupo1m z0>@mx^oK5C^&THH~^+^tS})3X=!7(dx7RkTt~ zajxI*zjYc+;9172{gYQSmK=&&du`@S$=!GB-tb7aCF-1hRkAB+<&sHLWW$F-R+DZ_XZWu*=L^>Sd>`F z&dHbMJ07|JhDO&cAGPL#46QwCGox%*ZH?o-cqq}qReknZHzh%fN7trBtb2RyY0=K3 z`5b@BF1*^YWy=&FwT0XFJ^r#ZihtI!%vos?#}W*De0+HL`5#-v6`S+5C6?{3Ei3!> z@NoOlq>USwU29my!87YvF27Wx<%0hLy>6FZmPDU>{vmJ6r@o~@nc#_w&o=z&zw1t4 zIjP_|DN$l=*y@WJZ_0l6N}EgFsAHa8?(pdMx3{<7=Dx9PX3twM946BGct?f_*I~m_ zv6fdXJajJMm=}fzM^X9>Wjs^R9 zzuPW&BirfHbu_7Z>*NsMgdLfB)2%Hm7-Bs-Vxuz;Z)|uKVth^Wz%|hq{(NlBzkmPM zy|MV_%Z3`GwnV+@*;!d#sgY~LR$qQOYx<6hufG}_8*kv)wzq0=`VZddGjozQO7tIp z_z2Iv<}K4lMQr>$PLXHLw#4YEPxidA=qUG#DX*SS_GdjK1e%H8axr7hym{X$Y$o{U zoff>3wDHWDGdlYE+r!s>%4u(D%iecbFkIwl(ng8yqgJPXUJ2NtHPy@7+WKOK$PqoG znLhjLs*NPt6g`DpuAZN7zkZS+)04>>4U1N|C?%TBUK^GjJx@9P^8o?Z>;E@2>M5P@ zRu$r$_dN7oLXD1syr004lk;VsoNqpupf~+-rJVrBk)(~&ByaBAaQp4#$&=M4b6zug zU1GJhcjAGVC~*H$tlRZ1i<^|7t249P%8(`}y+47uoU60~HAF7GG;s|)Ubi|WvMqU! z0>|&)zi;2Z?NIS`KHn|Y87xizrFhr`-i3DVw-sY+e)#BUxBq;*v(G=jy>!#Ec-~&K zO^lhRpB8CNeI>=o(V^@TxaP{_2l=0V+7$f$`1ttWM{hYBjs{BgvNb(vY;2S^&nuab zB*>%gC?X4PaKFyAFmBB5SdC;pRdp)u4TFqHyV8Vt79jm#1p|;=BTf! zFK4@JFJsptx5X2ey)3ZUvwgd{mDR0B&jWi|?4o`&omIY>BNj73N^NZvv&XsBTGp%B z4p=y*KEM6r$Bzpa0^F@{hckLzKAL1W(SwJVck7RvA>54%`5EUJyxG>4=RRTHtE%bM zA}o#>Tcc(zvtHeIm&bsI`Rre}#UEc?T@4x;<`7VS=k<+=jh&sjkwN2Y8ABuE(WH$L zI@7#VZ2x-+p`Aw+0nY&`|KK88aj#B@HEbVwW}48!cPD+4E^p%nWFZvHA7)bSsbsv3O zCe70(r}8$DhmQ}`Qe0A{{l0pao=HbS%I2H%Y#;68vgpg+8a4Z@nZ3Qeo!vYiwf&#x zzQ41(eEr=rQ&tmSX&$y4I>uI3R+g4GdkSBiYd!t6>-gi@vuD37iCTVn>lx1ZP7jCyg zn>o2#HZ(REdUh>hV`tZo+rzQSM8x%LWwlO&Y1-zSFH4R+{ut(ValgxhqMbZjV}kOMuAMn>jkCt8C`2 zuX(C%(HFi_#b?rzt68d^n}RufCM~&`VKUQ4=k%T*W0rL(ON^fQm)HL;%Kr`I^$+NEGd3|fV$k!e+tY0J*$k6c zCA(g}d^vIA#Io6!89kN;$sB)Lv~%gwrIRBMf8KNdz4`33x&g0(9v_-=joCGDu3x&* z%zeMEu6NRQ%KgaSwP<_({kVvTALsx7@h{C<|N3iHM6iLCm6e^{y^yz8-!bHDJejsx zKW-00<>M^7o4vigA3uH+^KkqArJeouivpSd$ISN`T3M~SnDHv#LqbaG*CFwJ8}@;~ zy0*%Mh*EEXBR6i`IC6yLumNalm2ZE=?zO?1DUofzPXGJ&@A7hg$MPVNqsF#VSRDw6eyIe^Iw-jFtq?tFKj?Z{7(}zthc9_xJ1dg=!{NR;LVn4&RUG zIjke*{d#Jb=cFt0ht8ZibKro3y>h6n(aba5;`#}*Z0Gu2ujKGjn=EsDqt5LR^*h2W z#|+M-7{%_XC@d(rkhRs%$7h4i?LGNsZHd>{$H&LS{JD9)?%Es8`U^V_81Ni6SbeqX z=_%38H+kycyyM*0#;_!r#qq|?o0b+9CsK@Fmh1{FoUGz`G->0_Nf%@N686T~+u7x9 zzr8o^{nqU3QJ431C*HVo=g#^+r`F%uTfM!qypeO?!i5VrZZs4qzViC(-=2`E4X$f9 zM)ZKkMG6Wgc&JEKb61!=`TTQP*p=O%KdW&ZdRt~K!`FYj`17-~*RsC}u_SK4J^SNH zNglTL!w(BAzEtfETfH?x=d4|7sc=Kc#|oRde(&GDbyb@9OqAQxy~r z4(lq>IQ7&h=kSet_wL=l-@cT|cd9vCBm0G2FNA-nG&FSI%rTpNcK+Wt>FMd|x8oEJ z8oar)v-tj>XXgL^oc}-P`A?~J43e5NCT+i&vwrWlsQPEy*g{tAsM}wkoUA<4XWNf= z8}8lAFfn^6ChMOdbKF<$^4r_n_2c%excqX~+%K!;qSlt><@GIld5dGs_1F4wdn`;$ zSh`&us&+keD>HOhd-nbJ_3PGsvbjHPYyTpRG^3e!@80be*9RHrnILmK@xa?&^LsmL zf17n=uQsSpHk-}M=4^UeGQrjK^rqzGpS#+2VRw|DGYs zwU;IN`~QA>@Ic|Pf#34WFH2V0FIkfLD?9%8ty8B?UFz_8@{>%DR7ufNW9yqvS$ zXefC*dbcphnsI^zKTDd0N%$YZLE-ULbyUc9ol_6FZ7AMj+ z-^?+S;AuOYSn5~1w(BmT;S-0xHu*xmph^P0m#_@V%@y*>nB%`+F)sAM2H#TY6+In?zeS|7&TGz{$FTdaOneUI$+WE6{4GU!E z&z{|VG^tZ;{e6z;nM$0=ki$Fhr|J!fBC9h|Aym7-Eo`1_kS=<6ka&mI& z>h6J-e3h-fdMn#iM1y0q&TOxxzq0iho)%T+=ITy9xgO>=+vz8WcFXj|N41^S332uZuaEwwaxs z-NTuKCsnBZu%VqDXrXswW22V%#4l1Ze8^7v>tEwO{#_m6Bzbzggt- zE6earKmGI;N6(@amtSs)(7BiqGVQ4pXU`%HlkZYIY|AfKetvd#e3E7BO1tXSjJ}s+W4`;T2>H*qYh`tESJ(Kx;=#YazlDW`nVFflo?H^f(Gw!? z)Xc%=8ue}Zy_m0GXpeOgqNBAvFLlOH~n+%=xVKK}mIHgeNaO{IEOJmu^EeC!t2 zZ#tN8cG5+Kr8!13>pBmpc>4MJ_V)Df@bMk`5!75a(N2KHQS1JX&(F`dA5IiqWiIQw zGNkMHzo{w+dy|b(P^W)do*IVzN zNZY*k>$PZ>CIyMMoiTd-#~&B%gsdyCNSwwn+=JEbUD(NpL{*d&$6Y5n^c4sSR;UEh82#kL&3EDrDz$tDGd&eZ`LTPnCc z*$=+{+B9W*-1>BhV|m**>r^k(PLN33e*0|NW(AIyB~k0IUl;qV)#dc%=4}m+Ni03O z(@!r~Tft|3dCRdq!W*`2+qQW#^Y48^oi6qvc3cq;vJ518+#a7f({uXis?D5985tb6 zKK*+d|L>{v{U66xvo-j;?iXGh=ddE?VeZt&`y|>dW%@EqW}QsAWu&MBT2y)G{{82b zc4FN}lQv$vy&&qqyVNEH8NTC3yT#Xrt&UsYep&Sw=c_%eIhPt2zHm{xs9vy|(aGRI zbbz$Y<^IPOc7}!%ebg%F<)_+e&IcumzQ><_))+{%7)P9bx2|Z$*=LQ7j1kYc3}&A- zo4wXEQ0qms2v=)!bMwiQCvUyGr2b*%LQq@>io~Dto6Bj@w>{_i0u@i0Ck97p1++Vv0pTi|x(2k21Md%=Jq*k;*l@9T_@Hn)`;kWcSgs^77@&mSqXP$Y_g< zWp)khI;!*~!DPZN(FBQQmtO{mKPRufB(jfhSn&Bj*Bb0%Vj(!nMm-kH6P5{ z>gVroEyLHf=*s2gDMm9FtbAPm?|J>PW5@Jjb`)IG{=)gks9}YI5~KT_XQ`cmBBi_U zDl04h{{7pu{i(nW@$RDz3J*U2oOE)D0*CGFS(jdao!~4dFF$|YytGI$H+QK?R(Tc< z*^-A1_TQK9bos@yPJHSVql&v?cP?g_@bU8>F3I4vU#H=vH1WgFnwKS3Q@vJJo=*Gh z0O~=?N=RII{Z&BD`9Mr@v&(4)W6sT+H=mzp``BV#+s!sR(BkNn$hLc$A9k(XW7@SS zYVEc*A*S=6Z{NQCvBGAi&$6JE=WboQuHo=|J5diBm;?_K9kKgjm#jb*VO$r^} z)AelY{#eNMpWYPXE5N9ncQk3^a}FNUg+VJroPURWb_l$(ani*Afu^I^q@|@-hO8>* za@wGO>tVr}vuAI<{r2u!g0!S$WNhr-^7r??zP_HGmge`=Am^vY(xBMieKKrYY?U~e z-nzefsH?M}F?hlvjrV)M%Pm=u!05g(prJ{iM{>*U8_$@6j&D%SsHl)&d!+sL_}gvk zUN&5NF?H(H<^J=l_TD?pni%n0jRUkYylfX}k&{e~>aIBldb6r?EDU&>%PM=_n0u?u zX7{R`^ii99`sugt-?wkuRx$7WJ1srm%L;LfXGIO(F`s-Ar|>rMfY}4^M50Te3I|iv zi%&oQyn6NO*|TS>#i~k-8~Hzn@dj5v$meW2VBoX(;+`uUzJVb&a`o@`es|!voqxW( z(9vW4_19mkR$sNUw6rubVmiMmPWf%30H`O(%XWBg^>?n;rn`CTWxjB=9^yPXah;NZ zMMTE;+N%b&(G0uO3S_Ro{wme`?8bEqnZDcG^Y8C0cDD`w*UPj;{<6ti@Ot$`tM%Rd zTedo`zaGD%;NiW!)!f|NSJ$(=nP)CIFXBM-RF~5ZQ9Oqi_!VqwKUi3kkZ>TwWRl8F zR`s|^DhqGkNHB0wjGMN5SpIUKQL8e7qCx zu2|O3?|Sj%$&>bf4%`1(z5kc?{`>c(l%@G3zA$RK^&dC3wqAYtWt8#U>#Pl&myLTn z$_*O*r_Rf=1@**vmNaC4Ft(b@_eI{S`#{y+d5bhApDcNJh;{j8&Zft2%~mNdyqJ+= zcKh79bC;L<+y8y(AO2Z+uKNS8!^x(Xf_0fQ1ShGiy!`Uj&2?EOQn7m1&z#8>|UPv%GtL?n>bH%&>NyiCYr&+HQYwo`L zZ*6|9_fwW_x8L%$AI>m|(h=)Fo@_MpR&L<;hD9392LnKp<~oNBmdn{*@k@B1d!?xR zXwvSpKUo7rT7xFIEzW%4RNSPHmX`LW?DwlzS`6pEUtkUw%^&KzcFt8 z|5xGrcWvCrI5)y~*J;j%Rn&$X2(*mpH+YwpX7rV>2y ze;$dyzrTO~=FOLvM2Cc3H{vliGBPqYb`A`2josS6joEMi@^4SJwjT)KYKmI7s)GAx zipKM88z1wFt*@||cPwsNqJ#zOi?6@B#r6C8`^zs!)J$(ol#n^zd{ALdYTVaN(}W~e z?U)c9=D&i+Rzkqwb&1u~o}(Eivl<&2XDySSdR)mcXxd~$S!YF^JBxd$p$j@vuV>( zBd2|>+FQ0emegsBq!xl1q$??}Jn4F)AeL892fB!@DmFBK4Ue8Hi z%k9iWSsWwQUdwvsbnvFran{2I0$SHMEMLqYdN9EtW>?9}W`4T|7U%M|%m0qeHE4{F z`oC0eJvVc$iLZ0wX5~w3Hyn>$etB<}(~h9&*D_3|>&4ERH;=7X?nnM>-gk>PDsUWn z{84~~(e+xI;EqQ{iM;`T#dCBVYP(cCGt`6Z^R*8cyt%eEy8h4c`akCNpUw4kAGEP5 znazHCX{mSG=9!5zzUjSuwb+xvaPr^3wQY%R?(Y5j7Q`{h$NgWrEkm-dQs(%>{cp{d zi@o?-wf%OjMOT|H3tv04$iq0%$gdZZCaAppSn;K5@A>D=5-~l+h1``6=Yf2GEpFL+wD13i5nw))Px-cF5Ju57~$sTCefC;J+}6b(6M93 z;;P@8&RV9fuKxIAg@Qnb+v1%u>&_Kvb}bUui}6sKtnj&P%kFPySk}F`zG6eNWK&9j z#JdFnKSLXeci(>d?^O7{NvEIgdSL!4On}AE$cX8zJ$Qt~-_P%mT5p1ZMwil=rB}kb z8yD3Y&3s~UF2!iCNWsF76?8 zyg6~k#*G`3uNoiNt-xVqVUe-@_TIRBtGRkpy-ubWN$`BK;otmD{c@Lz=lbi?eC_?B zyQ@_j6+PI#t=jNbu;RAt&)HqS-mH!~xZv3Bs>8o5L?zlb>QrC(F30L9U|`jxz~M3L zNYcjU=4KC-NheeMZtQPdq|r0C#c837(#K0ix2AK(2v)B7qI>=0ZBq$_4Lqz1`!t@+ zBL6U=6(r=^`q`y9Ey_rk2a?YEB~KmPdRkIm`lFTMP- z`o%KPQquGH?!_5MY`K}!KL6Q%g%94j{aOq4YL048uAO`C_Nkv@YGSK<6ilSfF1P!- z(nV?G)~!n?onOk&;us*(+L~i3uDS41rh&wls=uF~pLgu}l+nl8<=Nca-01LNX~B#J z`_%zeTv9y=9-=LeZ{-H|u8>L9u3Ra#?!YFUHzon+3zr>lT(sg~!i?Fots@N=OFgi# zv#`i8k^27q`{Ii)?%lh0F@Rg?LyVp{51YEWx^{`eg13R8ov9P%G2JT*dw<2dAw8h= zz}F==O{93Ygl=>BOz*A*aqH*Lo%^-IhOIfVwDjvz!ypmYuT^^&X*4<< zc>Ohuqi5xE9TwaEfUiu3NBV>v*1VB#xxqN?T_{)V*FSm}-oE)-C7qEg|FvP!ii;O7 zw!NMP85x3X#hj$VU!Q*L*s*hSt=X*)Xw~1W^VKN`UeJ6o>E+5_g*SF9aWK8fz0@nw zW;pxo*&P+~>8x#u%F4=*A3r`@V4&jJ*Vnh}Zk|V8>)}KT^USSL-A5OJ79E@THVfxT zHy(_1oD;Kg+2#PtYu6;@!t1Sa{wiKm~Qdb-JPd9cXQ_umh5NOdjJ{d8bzmU_;q zw=VVFe~Q$0>b4hm%~Ir#g{=ixbSBN%Q9w)P_8Zv;7It=a_l)+kEWTLr@K9@KXXoF) ze|vj-12kF^C5{(fkh;LdLAb2bmQ$eG5==A zHSA3-O$s_XIxb3r(^A)lWp8$R;l<3hWJ>0&wy3=H)Txcz1J16ppL|DPg38R(Pn9Nm z{Fk^R_Q2v@*=~0?w? z1dS(Q`+nt?hnX@orEq)=&9+bZ$JOezoA;$Z_D=o`}Ml^;z`i#UiVQW6O$`?-=iB3Kb$dt z{`IV_A1n6g@yBEuzq)hg)WfYjS!!<+Gq&D(_%N|+(XCB2ThbR^%y8&k^Y*^*wAA~R z&*y%>SKa@3M^prRy3>Q)_O??UTv<+E8CDg(-L|NmVH1~Fx2x(+ffI(gd3o~=&N}ro zVEy&kXVVs6ywSO^D3wKgOWM_%Yqg>&pczKC=7r9uU(V&2q_TYZ^7yTC3%?pj^@2`0 zNKZ@i`+i`m!sJCOzP)?1TG-*P*ZYv&XDy8#7`DXdrEk6&w6f&qr>7xWp-Y!U%9l1R zdwKAnWntbFEGwm}%%OC;$Zo#ASohMPml2|T zbN#;G%3klPG_k+NI|JNF=DEU+NV){STl0}bMIp0CIr(WG+CCp-IVj|M% zQnWLMTjz~$fXLFc>25E+R^8ZfGtXP9t!q{p`@#?{(Qa3c8LQ`iJ!+6sRb?f@m7Aac zJzaVhW3OAVj@aXm7GF5F-=34~P!j1LclK0&Zlj9lhTDQn&!jv$5+$yD`;|47mwico z@y-|p0RyRCozts6R4=t^e#Q7?j`g9xPL|;UEPanz99I~n*>QC&S{s&ael$SKap@O{ z?xSJh;mI3ka2=L0bz|N7&?o3+F3XSTKoRMpf>xPK3mHEZp<4Aj4l-qIPm3&tIv-f@ zZMJ%A_ICP$Z|keSGPrt}S}zJ(IR&)S(8p?SUcxQTp=T)ar* zXp*4>kK5vlS(|!Zmi#)*Z-3|h{rs{kcX?Ut?&K{s6X`w*-kW>*Wr^u-m28H##KVUV zU%q^KRc&3VjDzXd7r!;T9vwS&?Bw6)Pm3%icTX^{}moN zjh~NC&aS2+#Mg1s5)YL}pMUbb=e%?Crl*R~*~=e~?f?5W-~8nzT?QM?s~^^eiY$%M zn|}Ifjoti{DG8}D+Pn=}1u8->tsh!3@~%>^UwP`OQCp&kZ+J}h#)vz2?|%JRqo2An zL5B5;?Cg)Op%dR*X@^WIV|(Q^YgO;;M;uKCQoSsWBHc%~Nn9{p@yt6^Yif<%d=<~j zmoJOB9=vp*x3@Rhwa;j#kAlDgjZ+B*CiiZ&#c!ONk^G43Qs4%yzUP%QF6K@*JfZ$t zJ+XBS*Kb}cRs->Xv)bNEgET~vBBv$Jc>DHkfX0_qkAuMTR|YeEtmdwJcv8w__F1!U zU27RjPHs^xXANdAxUhEZ|BapjBB|x&-~WEU|Ngw$6}FD9^C1(YB_u9nm~4!A^W>F6 z#=(8xZe{bnpZVH@|IV?t^cC#SiY#YX1@Ko-^l!PIQ0lxp_oc^`1rO$Xs0grFn!aD4 z(WSsqU0ofucH746NUNy_bgzA1SrI$$d1_?X+aiZaDxuapR~0?5;8SW(|F!Z}aNE-& zOR?@q9WjA37P}jdFaFHI^Xav2@_H4|sm<$d#k@<@=$hoC79JL+G|^*c%scb6xuOq5 zXH|9QnC;%Wb?Nojpvm^qvwX@0+R(=H{`&Ir&><$3!|_$UI;WcyX80_#Yg~Qg;jypRtI%Vl^7CX_mSfJpW3j~c;0i+j-(A64t%z` zvN3C4g4*PTnX?jSY~#;aDXbOwYwGEz+qP}vYCZJefx_uc>2o83zDn*;e7x^z(as2+ zrb9EV_;)qb*wsHjH~0DZ`RbET`lto}XEo$0wvbtHCGh{%UHnr`6W*G!IND`CNs0V+ zXJ@fk_tIbTA+2BC10xT|@>eTbFO7QDROY@h;!KKB`MW!jk&%Kd3pcK2yE-R4Uw=W# z5;;}18>@mfM2dFaIdbI4g$n^wLmfsJ#YnDS z-kLk^x$Xns3olE0_ZEpW_@!rA$2eSkc(hww=J?q&XF@jb^mAodmohusZca#zRKWI! zVrE=NlRn-pzyI}T&BKBniWfg^3=mPBeRkR9%(Y>+`xnJKal46UxowQNbMvO=^wX0k zPj*}U@yT+Z-<&lwwwTtRZ`id+!%2Jz*WPtHr;jETCMG84=IYM$`StgoSjK$^clXWj zFBKjz=vky;YJWw=^VaR#r&ElKjg6a|o7HFBGGhwgmVaIHq(&E0tJs7kSqoZuU3SLo zD}O)l{`>f-D6geKy1VmT>lSQuelnL&A~s{Y=yHaT%=vb;Q5qs`ZEbO(8hKyW2Xgkm zUjOdUcHuqitaeXNjf{(nyLb0)@6n|5^X>Dy1KdE}+sO+eZ~of7TEO7)vTc{&uMW@< ziQ7|g@bjM=H)b46u*fQV%@Ol)``7ul3Pw6py_ytElCL}dJ@Ndr9cYE<_lJkuH*lJNO*2NcJ6zz=B z3xCRS;f`Bh*Dsk1JD$A`*)lybxKk?PqmWA=Py1oJ`R`9oR-dpSXzE0^=7Vp`%BMZg zWYg%%nslHlo8kAJwPCLh9B4S2v`}N!LAf2Dt~C^0eYk%mufyG>jWf=sg=%=JdI}vc z{8(Wlz;S1Py}ba(mRN;f5i25vR-|V1=J^FP1}|+=`0=-H)-qi&?`F=JiE1k^KULyT z@)UY=sI56~u}%)?5EBa-KDK6lIh%;s*w93qdI^F3C0jnPl>d4~^D0~9w9Pkj?B?h5 zG%9Y7>74d@zk-hBsi#F+Uv@1Bc=`Re{f`ID4hjql9e4Lyu`UhLoaz-Q68iZ@)N)bBT{tAKqrWn7-=O7B=lfR!5Fjr|<9Y-@kQ>sqsbG?b0*{t;S_9-~AQ} zZ_ewQ&%fA>sd+;DVl}OYJGgC~vmU#d&8~gF_j}x)iixM68qMsP6}`s4P3QE=5U*tYo~f<686D zi@IUGxrXOHL?4XYe(7+`^WKHCSAmAHtS?459H@DIPWDBurG>?dnCaCbn{=Lku53%R z+BkDz+6tG-#hxps9k_7d>I!ovmb^_j*Q{E#?ECxs@ljEaKr4Amo;KM>PP_baOWgYW zW6Ls2nL?6xS6n=!$-rXVfHF5gs)QFfxlO{))&943PqfkMh;@20=i28ctxMY+Id#Sr zsREgF6RF*%+SL-TuAI%(iBl^uPD-2bDyZ2;^o((;5oqp3-GAPj_wUOctA4JTkh;@- z(F?Y^-q7xNC4()_W(H}~R5YibZam}btQVnf(H9*N5fK@A^ZxzwDMq>V3$}2}UU3dH z-jz_YP^!i#^hU;U?z3kCDsq08FAdOGvu@p?hIa=KGM@kZcI)!k1BG|qmYqL$uCJ%3 zK<4@9pSxCid=rw|al!el!UTVjUs9z}4cnq`)ZC1F)ui+I@^XK3GqY{mwh48*+?H3( zZ46soY9S-motlyH;?>pF&(F_4URaZuc<}xAa_z>0(qCB`rFZ}EpS0xEw7YFF8`3`5 z?BB9w%ig_vw`{p$^D^uzlWXAOiy2#^B->`rne*q)b31bji-<~x)0^Iu?cN*r{?n(T z@HEB{)m!&M1Z>Vnuf6t5pQZcZ!u_jy`}o2Vny=rzapT3!jGGAtf3D^Kz4rC>borlyvaVR`GujUUhJ|2-Fb(~%*`(ey%7Fy|0>Ev<*jr)K#-2VQ^m^YNKeRnf3; znwg*ALC5m^{QS(!mlqeizk2yDi*bR7iJjfM5-WE%H@*0MYp%b}-Z$%Df`J52){Bjq z91p(SwtnNP!2R}PMc2_s{&k-wzrMcy|F``A+YUdhm@3aED|w^&hS)c)uA+j15C3ml zZ=bR4ltsh$EziHyS?|4{p(!G9l(!(~j?AW=8H++^o|Ek5_x`rwi2BX$^>#Bqyht*d z>#x^ss3x^tb#mh`gEkAP=PonuESz#lTDNuDdbyPulqI%j4hY z{{Q}W|G$;3t*wm>&)wI?_HAn%mR}a-YCV_`urg%r+O_U|GB>AR7difM*REahbw8IH zY+C8rgR@Aepp}E+ zgYNNb*`I#?xq9{Lq>UC5JhyVT%{I@UHf745pU-B?@Uh!D28Jk2RNzpGek$%csbu$E zx&Gzbx0e?cZ8Dg$_w7N0p7YN?SL}K9>ec7x=d(?uZUu+mxW}b8z5B7nmwL;N=jlC4 zopbLnaBVfanX}DnDHmv$(d*Ug_gPha$;i#s?R9J3X7#YZLOX6|hRLe}3&>L00F_R5 zj;4a0F^ex&e0vkQ_@c&Cui$N4Zn>)ndFJM>J^uJ)%Izby?0xr@KK@|=%2 z)U2|yvW$!u&(6+Pn&`3kV#8d^&qrrI{$lC z)QWfCwG7Hg+%DpL?|s;U*QfI|rh0+Tp!)WFetq4yo9SwkSMJ$kWBho52A9Z>jT<-a z+h^Bt>dcw&bk>Cd5%KZ*6Fq#?f}^9O%i`_i`orVn-+%u3rmR}5ePcS;){JVa1zYNs z_p1t>EIx00{lbL@MLX^Oe!1MvFCVwJ>g!~GyOReLJSN4&#QgdD{XRPz+qCJ^xlOr* z90mB=7YD6OF`9WYrRwR_@Cutay&bkMN_K^4)k@VDl~w+7i}`ul{$-0rU;M5T&!s^Z zGfZ~I{QGu0pP!%q`}gnncNQ<-yLayS=kga=SeY2-pa1@P{r+#?zl(2(_RtY~Y;o>s zk$&v1kjTiHZyzpcIQ;O*N(N)l8CRiOn9gM6<+17B$}!_^b&~3BQ|O3WA0HF*=k50U za&|Qm-ABE=y}yUI33s;Ke_w7Pvv)6O(tGZlIcLh{-MkEV4j0C(4V!OW{_apKxBk8# zi@cVeIvdXGIHAIZ@7tlA{?u}VnP(oE#54YXkzc^-d(G1+@XMp4-Er%`Kc8R!@6&1h zuT_7u*YDjHe@f1Cl3f4t-Mg!cij3CJp1OoZO#pPR)}LRm*K1GpQk$$f`DEE_jY)|C zBDO_OPE1z!|Fn8T-bJbTlmDnszI^$z*U}&rp@StECbQP9TX)|6|DW0U`*zm-wd!@V zuKl$oLg(J+l%tP7E?v6x=g*(_?%i9ta-~g`_oWuU<(ngP_Qu`6cQ4Oq<`l1`5jy9T zPjk&a+xGSB^rf0@t2SDHy0337q>TS^UZVf?blz; zD*qkilsS9l%9Sb3cWWwLFSQ(gc;(8KkgzZ>6`{3Z(Sd;zA3uK^=5SrKYtg5q#xAY* z<=X#*6S@|0G%0u%{hhWfXr*4CjJw5SlS0(Q^~mg zdbGzht&I_Du4Y}$+Ildd;@i#iQmfc~?o3y&T|0OFe1Bixv!h0;$rHLH-HMBgKj{Z} zcq#~}n47URFAUJg%FY(eZjUqwjH<&EC6X$BP1sqWFe~hk~B`GR{5B z`KH85majc(?Y#{fJyj+JFZc8H@v*7+@Zi_i*JZo^uHAmmNUB$J{tOS5u&^*2x&EtJ zvQkn}v9V__|Csj`JZ0yiG%-SF-RjlZxw)aMud+sdYdroqQG)GmT(P8xD|gYq^3(s# z3{FJfcyn>#<(H+UrQz#h0yRXo=iUAF?RI{v)5E8yr<=5@tI5mB}h ztGTD2KKpT3s3>8xZP%QH$5yIBonHlIr4Jaa*|;%q`Q_N%Wp8)C-?zK;b(pvJ>A%0f zb2f3XFzsAZ@|I(+U;F2qWUSaB&e!DuEM zJA3hlJ8u@g&0<~|I&te((UFq9Y?4yR-xT@ca{< zW%DFq=S^qEdk$HQWxMZ&t=_tFW#{?l$CC{A-{(L3X8N>gll)>&r6Z+u{b*1=XsF1Zhe57uZGC3n00I7o4Yq>R9oeI zUt}!7!*f_^qKDOM_Sd}67PYprURXMR8N-1-hZBr_kBw%oSqTE33IYidVIkL-e09_O zAo6DM23707so~vix8E;(Z?1Zq)1YgSP$$be|Ccu0Dy#PFk#Q8*eb>&;E-ol&QGmv` z?=~$$HBJiy)`so>{ciWtrAvSR_^_v8k%qu^7bV7PE16Gye^g+Rw%JmEV(FAvq48n=G> z>8D3GPe^FxYCr7xKQ~Or^THoj6(L=H{q%pgGuw~9_j^-g-D+IG-=AhQbLrBh5^cJA zdVapXYuB!QS+c7{q{wc5{hJ$xi!XBAtO*SbJ(@hd$vW3Yj=k0L^}(JA*MdSrQ@2JL z+fPc$tD2&g>8a-PDDOIdl8uE7Ut6Nz^5VimL8F}Db?h4VvbIJo4VpT2>Rg-3qT)~Nz7HTkx*LZ#5#~$7dN0+M}a9U^})w}Gae-!_XrKv`d8X{aQ zOp_;1j#?Y0Hd!@)gTi*MR;J^HD{3cZFTBF;V&tPW(PPcJbxvMs+t|0N8qM@s8l(WJLORVSZJ;bBV#cT&%vJ==OX zaju`b0LKr|IJ14_r8J{Ep7`Dr)0A^J|1x{j{3@GyKHDx@==|O@ zGr_>Z#3W_&O{Iw^PMzX%6nGhzr`T7MFKy7IV9~cZLZ@rdp1Ae5-|+}&N?-rnx+1_G;Aty&V4 z$*3>n5_90Gm`>2n`20%kB|$6wmd~C$_iPW3^RqI>{nFM>0xhSXM#aT_`&nZ)J2x>= zQG{#le}?(zj~CX6s~f!Oc*$Wf*H4^-MW~bIc;U{Nd$(>yg@?N@zs%W`k&*G^QMZ0o zb@lFDyXN>kZUDM2ucI8_SN2rPCL!NHEy3 zXO9dI+uJhhEkQR79{Q*SuME)=;p%lO{_^5t39Cd_W!`$%WC=D$@u;<8TKk=6C0-Ia z__XNu?c13qv#w^9TF8{YyHokFRa{D1np;7O`J3sE3vGuJPs}~1uUhGDA>;RJ_K$gt z*8>Azo=*^FEKZA$zklWo&x`Df877*0ZRGeLU21iDX#IZA=Rcp%Zw8%z-x{;CNcHS` zMNcCE4jnzcydRcJsyJH@O`A6D*fF=AG5czlgqTSE{rCI*?p?do)YZMcz3qR!Q0_iD zX}J-P(kv4mrz5W0*h>RioEBcZcoB5uro(SD2ay27C;7o5qMa^>9)9@qN2lvU5(5Ln zTu&FrkUs|wI2cIqv^q&Yn|Ami2Md$l^lYPork^g9iO;`%+qH$QC3M|?PVf;6 z*R!@dEeuc*dinKN$**}EBi0l@Kc_kMRKNYd8(CXHdl%Ivznqyq@1*`@<&upaS*JHf zteL=fdrHC181P9eRRv5jHmIwP)tk=!=H%zkpJ&@2PBBsy@=Q-(?x8a2^wXzL epU$@bY%bTB&}OFb@d^V21B0ilpUXO@geCwy&rk;d diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile deleted file mode 100644 index bfdc1ef23..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile +++ /dev/null @@ -1,41 +0,0 @@ -TNL_VERSION=0.1 -TNL_INSTALL_DIR=${HOME}/local/lib -TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION} - -TARGET = hamiltonJacobiParallelSolver -#CONFIG_FILE = $(TARGET).cfg.desc -INSTALL_DIR = ${HOME}/local -CXX = g++ -CUDA_CXX = nvcc -OMP_FLAGS = -DHAVE_OPENMP -fopenmp -CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG -LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp - -SOURCES = main.cpp -HEADERS = -OBJECTS = main.o -DIST = $(SOURCES) Makefile - -all: $(TARGET) -clean: - rm -f $(OBJECTS) - rm -f $(TARGET)-conf.h - -dist: $(DIST) - tar zcvf $(TARGET).tgz $(DIST) - -install: $(TARGET) - cp $(TARGET) $(INSTALL_DIR)/bin - cp $(CONFIG_FILE) $(INSTALL_DIR)/share - -uninstall: $(TARGET) - rm -f $(INSTALL_DIR)/bin/$(TARGET) - rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share - -$(TARGET): $(OBJECTS) - $(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS) - -%.o: %.cpp $(HEADERS) - $(CXX) -c -o $@ $(CXX_FLAGS) $< - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h deleted file mode 100644 index c07ee95aa..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h +++ /dev/null @@ -1,47 +0,0 @@ -/*************************************************************************** - parallelMapConfig.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ -#define HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ - -#include - -template< typename ConfigTag > -class parallelMapConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" ); - config.addEntry < String > ( "scheme", "This defines scheme used for discretization.", "godunov" ); - config.addEntryEnum( "godunov" ); - config.addEntryEnum( "upwind" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < String > ( "map", "Gradient map for solver"); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 ); - config.addEntry < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 ); - config.addRequiredEntry < double > ( "stop-time", " Final time for solver"); - config.addRequiredEntry < double > ( "initial-tau", " initial tau for solver" ); - config.addEntry < double > ( "cfl-condition", " CFL condition", 0.0 ); - config.addEntry < int > ( "subgrid-size", "Subgrid size.", 16 ); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - } -}; - -#endif /* HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/run b/src/TNL/Legacy/hamilton-jacobi-parallel-map/run deleted file mode 100755 index 484419962..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/run +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -dimensions=2 - -size=2 - -time=50 - -rm -r work_dir -mkdir work_dir -cp mapa_png.png work_dir/mapa_png.png -cd work_dir - -tnl-image-converter --image-format png\ - --input-images mapa_png.png - - -tnl-init --test-function sdf-para \ - --x-centre 0.5 \ - --y-centre 1.0 \ - --offset 0.05 \ - --output-file init.tnl \ - --final-time 0.0 \ - --snapshot-period 0.1 - -hamilton-jacobi-parallel-map-dbg --initial-condition init.tnl \ - --map mapa_png.tnl \ - --cfl-condition 50 \ - --mesh mesh.tnl \ - --initial-tau 1.0e-3 \ - --epsilon 4.0 \ - --delta 0.0 \ - --stop-time $time \ - --scheme godunov \ - --subgrid-size 8 \ - --dim $dimensions - - -#cp ../template.dat1 template.dat1 -#cp ../template.dat2 template.dat2 -#cp ../gplt2eps.py gplt2eps.py -cd .. - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py deleted file mode 100755 index f8cde3768..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -import sys, string, math - -arguments = sys. argv[1:] -format = "txt" -output_file_name = "eoc-table.txt" -input_files = [] -verbose = 1 -size = 1.0 - -i = 0 -while i < len( arguments ): - if arguments[ i ] == "--format": - format = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--output-file": - output_file_name = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--verbose": - verbose = float( arguments[ i + 1 ] ) - i = i +2 - continue - if arguments[ i ] == "--size": - size = float( arguments[ i + 1 ] ) - i = i +2 - continue - input_files. append( arguments[ i ] ) - i = i + 1 - -if not verbose == 0: - print "Writing to " + output_file_name + " in " + format + "." - -h_list = [] -l1_norm_list = [] -l2_norm_list = [] -max_norm_list = [] -items = 0 - -for file_name in input_files: - if not verbose == 0: - print "Processing file " + file_name - file = open( file_name, "r" ) - - l1_max = 0.0 - l_max_max = 0.0 - file.readline(); - file.readline(); - for line in file. readlines(): - data = string. split( line ) - h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) ) - l1_norm_list. append( float( data[ 1 ] ) ) - l2_norm_list. append( float( data[ 2 ] ) ) - max_norm_list. append( float( data[ 3 ] ) ) - items = items + 1 - if not verbose == 0: - print line - file. close() - -h_width = 12 -err_width = 15 -file = open( output_file_name, "w" ) -if format == "latex": - file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" ) - file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" ) - file. write( " " + string. rjust( " ", h_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error.", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + - "\\\\ \\hline \\hline \n") -if format == "txt": - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - file. write( "| h | L1 Err. | L1 EOC. | L2 Err. | L2 EOC | MAX Err. | MAX EOC |\n" ) - file. write( "+==============+================+================+================+================+================+================+\n" ) - - -i = 0 -while i < items: - if i == 0: - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&"+ - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1; - continue - if h_list[ i ] == h_list[ i - 1 ]: - print "Unable to count eoc since h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" - file. write( " eoc error: h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" ) - else: - h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] ) - l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] ) - l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] ) - max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] ) - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1 - -if format == "latex": - file. write( "\\hline \n" ) - file. write( "\\end{tabular} \n" ) - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h deleted file mode 100644 index 400e163c9..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h +++ /dev/null @@ -1,217 +0,0 @@ -/*************************************************************************** - tnlParallelMapSolver.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELMAPSOLVER_H_ -#define TNLPARALLELMAPSOLVER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include - -#ifdef HAVE_CUDA -#include -#endif - - -template< int Dimension, - typename SchemeHost, - typename SchemeDevice, - typename Device, - typename RealType = double, - typename IndexType = int > -class tnlParallelMapSolver -{}; - -template -class tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int > -{ -public: - - typedef SchemeDevice SchemeTypeDevice; - typedef SchemeHost SchemeTypeHost; - typedef Device DeviceType; - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType; -#ifdef HAVE_CUDA - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA; -#endif - tnlParallelMapSolver(); - bool init( const Config::ParameterContainer& parameters ); - void run(); - - void test(); - -/*private:*/ - - - void synchronize(); - - int getOwner( int i) const; - - int getSubgridValue( int i ) const; - - void setSubgridValue( int i, int value ); - - int getBoundaryCondition( int i ) const; - - void setBoundaryCondition( int i, int value ); - - void stretchGrid(); - - void contractGrid(); - - VectorType getSubgrid( const int i ) const; - - void insertSubgrid( VectorType u, const int i ); - - VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID,VectorType map); - - - tnlMeshFunction u0; - VectorType work_u, map_stretched, map; - IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; - MeshType mesh, subMesh; - -// tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - SchemeHost schemeHost; - SchemeDevice schemeDevice; - double delta, tau0, stopTime,cflCondition; - int gridRows, gridCols, gridLevels, currentStep, n; - - std::clock_t start; - double time_diff; - - - tnlDeviceEnum device; - - tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf() - { - return this; - }; - -#ifdef HAVE_CUDA - - tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; - - double* work_u_cuda; - double* map_stretched_cuda; - - int* subgridValues_cuda; - int* boundaryConditions_cuda; - int* unusedCell_cuda; - int* calculationsCount_cuda; - double* tmpw; - double* tmp_map; - - - int* runcuda; - int run_host; - - - __device__ void getSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void updateSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void insertSubgridCUDA2D( double u, const int i ); - - __device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID); - - __device__ int getOwnerCUDA2D( int i) const; - - __device__ int getSubgridValueCUDA2D( int i ) const; - - __device__ void setSubgridValueCUDA2D( int i, int value ); - - __device__ int getBoundaryConditionCUDA2D( int i ) const; - - __device__ void setBoundaryConditionCUDA2D( int i, int value ); - -#endif - -}; - - - - - - - - - - - - - - -#ifdef HAVE_CUDA -template -__global__ void runCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template -__global__ void initRunCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template -__global__ void initCUDA2D( tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3, double* tmp_map_ptr); - -template -__global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - -template -__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - - - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -#endif - -#include "tnlParallelMapSolver2D_impl.h" -#endif /* TNLPARALLELMAPSOLVER_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h deleted file mode 100644 index e8cbc6fc1..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h +++ /dev/null @@ -1,1315 +0,0 @@ -/*************************************************************************** - tnlParallelMapSolver2D_impl.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELMAPSOLVER2D_IMPL_H_ -#define TNLPARALLELMAPSOLVER2D_IMPL_H_ - - -#include "tnlParallelMapSolver.h" -#include - - - - -#define MAP_SOLVER_MAX_VALUE 3 - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver() -{ - this->device = TNL::Devices::HostDevice; /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice --- vypocet na CPU - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - run_host = 1; - } -#endif - -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test() -{ -/* - for(int i =0; i < this->subgridValues.getSize(); i++ ) - { - insertSubgrid(getSubgrid(i), i); - } -*/ -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> - -bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters ) -{ - cout << "Initializating solver..." <("mesh"); - this->mesh.load( meshLocation ); - - this->n = parameters.getParameter ("subgrid-size"); - cout << "Setting N to " << this->n <subMesh.setDimensions( this->n, this->n ); - this->subMesh.setDomain( Containers::StaticVector<2,double>(0.0, 0.0), - Containers::StaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) ); - - this->subMesh.save("submesh.tnl"); - - const String& initialCondition = parameters.getParameter ("initial-condition"); - this->u0.load( initialCondition ); - - /* LOAD MAP */ - const String& mapFile = parameters.getParameter ("map"); - if(! this->map.load( mapFile )) - cout << "Failed to load map file : " << mapFile <delta = parameters.getParameter ("delta"); - this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >(); - - cout << "Setting delta to " << this->delta <tau0 = parameters.getParameter ("initial-tau"); - cout << "Setting initial tau to " << this->tau0 <stopTime = parameters.getParameter ("stop-time"); - - this->cflCondition = parameters.getParameter ("cfl-condition"); - this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >()); - cout << "Setting CFL to " << this->cflCondition <stopTime /= (double)(this->gridCols); - this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); - cout << "Setting stopping time to " << this->stopTime <schemeHost.init(parameters)) - { - cerr << "SchemeHost failed to initialize." <device == tnlCudaDevice) - { - cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); - - double** tmpdev = NULL; - cudaMalloc(&tmpdev, sizeof(double*)); - cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double)); - cudaMalloc(&(this->tmp_map), this->map_stretched.getSize()*sizeof(double)); - cudaMalloc(&(this->runcuda), sizeof(int)); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - int* tmpUC; - cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); - cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); - - initCUDA2D<<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC, tmp_map); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - double* tmpu = NULL; - cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); - cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaMemcpy((this->tmp_map), this->map_stretched.getData(), this->map_stretched.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - } -#endif - - if(this->device == TNL::Devices::HostDevice) - { - VectorType tmp_map; - tmp_map.setSize(this->n * this->n); - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - - if(! tmp[i].setSize(this->n * this->n)) - cout << "Could not allocate tmp["<< i <<"] array." <map_stretched[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - //cout << "Computing initial SDF on subgrid " << i << "." <device == tnlCudaDevice) - { - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initRunCUDA2D<<n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - } -#endif - - - this->currentStep = 1; - if(this->device == TNL::Devices::HostDevice) - synchronize(); -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - - synchronizeCUDA2D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - } - -#endif - cout << "Solver initialized." < -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run() -{ - if(this->device == TNL::Devices::HostDevice) - { - while ((this->boundaryConditions.max() > 0 )/* || !end*/) - { - -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - if(getSubgridValue(i) != INT_MAX) - { - VectorType tmp, tmp_map; - tmp.setSize(this->n * this->n); - tmp_map.setSize(this->n * this->n); - for( int j = 0; j < tmp_map.getSize(); j++) - { - tmp_map[j] = this->map_stretched[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - - if(getSubgridValue(i) == currentStep+4) - { - - if(getBoundaryCondition(i) & 1) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(2, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - else - { - - if(getBoundaryCondition(i) == 1) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) == 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(2, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) == 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) == 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - - if(getBoundaryCondition(i) & 3) - { - //cout << "3 @ " << getBoundaryCondition(i) <device == tnlCudaDevice) - { - bool end_cuda = false; - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - bool* tmpb; - cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - int i = 1; - time_diff = 0.0; - while (run_host || !end_cuda) - { - cout << "Computing at step "<< i++ <<<n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); - time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - //start = std::clock(); - synchronizeCUDA2D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); - } - cout << "Solving time was: " << time_diff <work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - - cudaDeviceSynchronize(); - } -#endif - contractGrid(); - this->u0.save("u-00001.tnl"); - cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <device == tnlCudaDevice) - { - cudaFree(this->runcuda); - cudaFree(this->tmpw); - cudaFree(this->tmp_map); - cudaFree(this->cudaSolver); - } -#endif - -} - -//north - 1, east - 2, west - 4, south - 8 -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - cout << "Synchronizig..." <currentStep & 1) -// { - for(int j = 0; j < this->gridRows - 1; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i; - tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 8) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 1) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1); - } - } - } - -// } -// else -// { - for(int i = 1; i < this->gridCols; i++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - tmp1 = this->gridCols*this->n*j + i*this->n - 1; - tmp2 = this->gridCols*this->n*j + i*this->n ; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); - } - } - } -// } - - - this->currentStep++; - int stepValue = this->currentStep + 4; - for (int i = 0; i < this->subgridValues.getSize(); i++) - { - if( getSubgridValue(i) == -INT_MAX ) - setSubgridValue(i, stepValue); - } - - cout << "Grid synchronized at step " << (this->currentStep - 1 ) < -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const -{ - - return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const -{ - return this->subgridValues[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) -{ - this->subgridValues[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const -{ - return this->boundaryConditions[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) -{ - this->boundaryConditions[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() -{ - cout << "Stretching grid..." <gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); - this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); - - - cout << "Setting gridCols to " << this->gridCols << "." <gridRows << "." <subgridValues.setSize(this->gridCols*this->gridRows); - this->subgridValues.setValue(0); - this->boundaryConditions.setSize(this->gridCols*this->gridRows); - this->boundaryConditions.setValue(0); - this->calculationsCount.setSize(this->gridCols*this->gridRows); - this->calculationsCount.setValue(0); - - for(int i = 0; i < this->subgridValues.getSize(); i++ ) - { - this->subgridValues[i] = INT_MAX; - this->boundaryConditions[i] = 0; - } - - int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; - - if(!this->work_u.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched grid." <map_stretched.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched map." <unusedCell.setSize(stretchedSize)) - cerr << "Could not allocate memory for supporting stretched grid." <mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <unusedCell[i] = 1; - int diff =(this->n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if(i%(this->n*this->gridCols) - idealStretch >= 0) - { - k+= i%(this->n*this->gridCols) - idealStretch +1 ; - } - - if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) - { - k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; - } - - - if(fabs(this->u0[i-k]) < mesh.template getSpaceStepsProducts< 1, 0 >()+mesh.template getSpaceStepsProducts< 0, 1 >() ) - this->work_u[i] = this->u0[i-k]; - else - this->work_u[i] = sign(this->u0[i-k])*MAP_SOLVER_MAX_VALUE; - - this->map_stretched[i] = this->map[i-k]; - } - - - cout << "Grid stretched." < -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() -{ - cout << "Contracting grid..." <n*this->n*this->gridCols*this->gridRows; - - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0)) - { - this->u0[i-k] = this->work_u[i]; - } - - } - - cout << "Grid contracted" < -typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const -{ - VectorType u; - u.setSize(this->n*this->n); - - for( int j = 0; j < u.getSize(); j++) - { - u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - return u; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) -{ - - for( int j = 0; j < this->n*this->n; j++) - { - int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); - if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) ) - { - this->work_u[index] = u[j]; - this->unusedCell[index] = 0; - } - } -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID,VectorType map) -{ - - VectorType fu; - - fu.setLike(u); - fu.setValue( 0.0 ); - - - - bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - int centerGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1); - if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0) - tmp = true; - } - - - double value = sign(u[0]) * u.absMax(); - - if(tmp) - {} - - - //north - 1, east - 2, west - 4, south - 8 - else if(boundaryCondition == 4) - { - for(int i = 0; i < this->n; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 2) - { - for(int i = 0; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 1) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 8) - { - for(int j = 0; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } - - - - double time = 0.0; - double currentTau = this->tau0; - double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - double maxResidue( 1.0 ); - tnlGridEntity Entity(subMesh); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - for( int i = 0; i < u.getSize(); i ++ ) - { - if(map[i] == 0.0) - { - u[i] = /*sign(u[l])**/MAP_SOLVER_MAX_VALUE; - } - } - - while( time < finalTime ) - { - /**** - * Compute the RHS - */ - - for( int i = 0; i < fu.getSize(); i ++ ) - { - Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x())); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - if(map[i] != 0.0) - fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities,map); - } - maxResidue = fu. absMax(); - - - if(maxResidue != 0.0) - currentTau = fabs(this -> cflCondition / maxResidue); - - - if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) - { - currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - - - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - - - for( int i = 0; i < fu.getSize(); i ++ ) - { - if(map[i] != 0.0) - u[ i ] += currentTau * fu[ i ]; - } - time += currentTau; - - } - return u; -} - - -#ifdef HAVE_CUDA - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - - *a = caller->work_u_cuda[th]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - - if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) - { - caller->work_u_cuda[index] = *a; - caller->unusedCell_cuda[index] = 0; - - } - - *a = caller->work_u_cuda[index]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i ) -{ - int index = (blockIdx.y)*this->n*this->n*this->gridCols - + (blockIdx.x)*this->n - + threadIdx.y*this->n*this->gridCols - + threadIdx.x; - - if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) - { - this->work_u_cuda[index] = u; - this->unusedCell_cuda[index] = 0; - - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID) -{ - - __shared__ int tmp; - __shared__ double value; - volatile double* sharedTau = &u[blockDim.x*blockDim.y]; - double* map_local = &u[2*blockDim.x*blockDim.y]; - - int i = threadIdx.x; - int j = threadIdx.y; - int l = threadIdx.y * blockDim.x + threadIdx.x; - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - - /* LOAD MAP */ - map_local[l]=this->map_stretched_cuda[gid]; - if(map_local[l] != 0.0) - map_local[l] = 1.0/map_local[l]; - /* LOADED */ - - bool computeFU = !((i == 0 && (boundaryCondition & 4)) or - (i == blockDim.x - 1 && (boundaryCondition & 2)) or - (j == 0 && (boundaryCondition & 8)) or - (j == blockDim.y - 1 && (boundaryCondition & 1))); - - if(l == 0) - { - tmp = 0; - int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1); - if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0) - tmp = 1; - } - __syncthreads(); - - - if(tmp !=1) - { - if(computeFU) - { - if(boundaryCondition == 4) - u[l] = u[threadIdx.y * blockDim.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x); - else if(boundaryCondition == 2) - u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x); - else if(boundaryCondition == 8) - u[l] = u[threadIdx.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y); - else if(boundaryCondition == 1) - u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y); - } - } - - double time = 0.0; - __shared__ double currentTau; - double cfl = this->cflCondition; - double fu = 0.0; - - double finalTime = this->stopTime; - if(boundaryCondition == 0) - finalTime*=2.0; - __syncthreads(); - - tnlGridEntity Entity(subMesh); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Entity.setCoordinates(Containers::StaticVector<2,int>(i,j)); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - - - if(map_local[l] == 0.0) - { - u[l] = /*sign(u[l])**/MAP_SOLVER_MAX_VALUE; - computeFU = false; - } - __syncthreads(); - - - while( time < finalTime ) - { - sharedTau[l] = finalTime; - - if(computeFU) - { - fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j), u, time, boundaryCondition, neighborEntities, map_local); - sharedTau[l]=abs(cfl/fu); - } - - - - if(l == 0) - { - if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - else if(l == blockDim.x*blockDim.y - 1) - if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - - - if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); - __syncthreads(); - if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); - __syncthreads(); - if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); - if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); - if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); - if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); - if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); - if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); - __syncthreads(); - - u[l] += currentTau * fu; - time += currentTau; - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const -{ - - return ((i / (this->gridCols*this->n*this->n))*this->gridCols - + (i % (this->gridCols*this->n))/this->n); -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const -{ - return this->subgridValues_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value) -{ - this->subgridValues_cuda[i] = value; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const -{ - return this->boundaryConditions_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value) -{ - this->boundaryConditions_cuda[i] = value; -} - - - -//north - 1, east - 2, west - 4, south - 8 - -template -__global__ -void synchronizeCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - - __shared__ int boundary[4]; // north,east,west,south - __shared__ int subgridValue; - __shared__ int newSubgridValue; - - - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - double u = cudaSolver->work_u_cuda[gid]; - double u_cmp; - int subgridValue_cmp=INT_MAX; - int boundary_index=0; - - - if(threadIdx.x+threadIdx.y == 0) - { - subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x); - boundary[0] = 0; - boundary[1] = 0; - boundary[2] = 0; - boundary[3] = 0; - newSubgridValue = 0; - } - __syncthreads(); - - - - if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) - { - if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1); - boundary_index = 2; - } - - if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1); - boundary_index = 1; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x); - boundary_index = 3; - } - if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x); - boundary_index = 0; - } - - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - } - } - __threadfence(); - __syncthreads(); - - if(threadIdx.x+threadIdx.y == 0) - { - if(subgridValue == INT_MAX && newSubgridValue !=0) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX); - - cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, boundary[0] + - 2 * boundary[1] + - 4 * boundary[2] + - 8 * boundary[3]); - - - if(blockIdx.x+blockIdx.y ==0) - { - cudaSolver->currentStep += 1; - *(cudaSolver->runcuda) = 0; - } - } - -} - - - -template -__global__ -void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) -{ - - - int stepValue = cudaSolver->currentStep + 4; - if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); - - atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); -} - - - - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr) -{ - - - cudaSolver->work_u_cuda = ptr; - cudaSolver->map_stretched_cuda = tmp_map_ptr; - cudaSolver->unusedCell_cuda = ptr3; - cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->runcuda = ptr2; - *(cudaSolver->runcuda) = 1; - -/* CHANGED !!!!!! from 1 to 0*/ cudaSolver->currentStep = 0; - - printf("GPU memory allocated.\n"); - - for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) - { - cudaSolver->subgridValues_cuda[i] = INT_MAX; - cudaSolver->boundaryConditions_cuda[i] = 0; - } - - printf("GPU memory initialized.\n"); -} - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) - -{ - extern __shared__ double u[]; - - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - - __shared__ int containsCurve; - if(l == 0) - containsCurve = 0; - - - caller->getSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - - if(u[0] * u[l] <= 0.0) - atomicMax( &containsCurve, 1); - - __syncthreads(); - if(containsCurve == 1) - { - caller->runSubgridCUDA2D(0,u,i); - caller->insertSubgridCUDA2D(u[l],i); - __syncthreads(); - if(l == 0) - caller->setSubgridValueCUDA2D(i, 4); - } - - -} - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) -{ - extern __shared__ double u[]; - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - int bound = caller->getBoundaryConditionCUDA2D(i); - - if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0) - { - caller->getSubgridCUDA2D(i,caller, &u[l]); - - - if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4) - { - if(bound & 1) - { - caller->runSubgridCUDA2D(1,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 2) - { - caller->runSubgridCUDA2D(2,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 4) - { - caller->runSubgridCUDA2D(4,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 8) - { - caller->runSubgridCUDA2D(8,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - } - else - { - - if(bound == 1) - { - caller->runSubgridCUDA2D(1,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 2) - { - caller->runSubgridCUDA2D(2,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 4) - { - caller->runSubgridCUDA2D(4,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 8) - { - caller->runSubgridCUDA2D(8,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - } - - if(bound & 3) - { - caller->runSubgridCUDA2D(3,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 5) - { - caller->runSubgridCUDA2D(5,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 10) - { - caller->runSubgridCUDA2D(10,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 12) - { - caller->runSubgridCUDA2D(12,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - if(l==0) - { - caller->setBoundaryConditionCUDA2D(i, 0); - caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 ); - } - - - } - - - -} - -#endif /*HAVE_CUDA*/ - -#endif /* TNLPARALLELMAPSOLVER2D_IMPL_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt deleted file mode 100644 index f6a00127c..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set( tnl_hamilton_jacobi_parallel_SOURCES -# MainBuildConfig.h -# tnlParallelEikonalSolver2D_impl.h -# tnlParallelEikonalSolver3D_impl.h -# tnlParallelEikonalSolver.h -# parallelEikonalConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(hamilton-jacobi-parallel main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (hamilton-jacobi-parallel tnl ) - - -INSTALL( TARGETS hamilton-jacobi-parallel - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/hamilton-jacobi-parallel ) diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h deleted file mode 100644 index ed3d686eb..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" < struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp deleted file mode 100644 index b13498e17..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Jul 8 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu deleted file mode 100644 index 710197671..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cu - description - ------------------- - begin : Mar 30 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel/main.h deleted file mode 100644 index dbaebdceb..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.h +++ /dev/null @@ -1,142 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Mar 30 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnlParallelEikonalSolver.h" -#include "parallelEikonalConfig.h" -#include "MainBuildConfig.h" -#include -#include -#include -#include -#include -#include - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - parallelEikonalConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - //if (parameters.GetParameter ("scheme") == "godunov") - //{ - tnlDeviceEnum device; - device = TNL::Devices::HostDevice; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost; - /*#ifdef HAVE_CUDA - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; - #endif - #ifndef HAVE_CUDA*/ - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice; - /*#endif*/ - - if(device==TNL::Devices::HostDevice) - { - typedef TNL::Devices::Host Device; - - - tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <, double, int > SchemeType; - - tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <, double, int > SchemeTypeHost; - /*#ifdef HAVE_CUDA - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; - #endif - #ifndef HAVE_CUDA*/ - typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice; - /*#endif*/ - - if(device==TNL::Devices::HostDevice) - { - typedef TNL::Devices::Host Device; - - - tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <, double, int > SchemeType; - - tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." < - -template< typename ConfigTag > -class parallelEikonalConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" ); - config.addEntry < String > ( "scheme", "This defines scheme used for discretization.", "godunov" ); - config.addEntryEnum( "godunov" ); - config.addEntryEnum( "upwind" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 ); - config.addEntry < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 ); - config.addRequiredEntry < double > ( "stop-time", " Final time for solver"); - config.addRequiredEntry < double > ( "initial-tau", " initial tau for solver" ); - config.addEntry < double > ( "cfl-condition", " CFL condition", 0.0 ); - config.addEntry < int > ( "subgrid-size", "Subgrid size.", 16 ); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - } -}; - -#endif /* HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/run b/src/TNL/Legacy/hamilton-jacobi-parallel/run deleted file mode 100755 index 3aece294a..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/run +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -#GRID_SIZES="0897" -GRID_SIZES="0008 0015 0029 0057 0113 0225 0449" -#GRID_SIZES="1793" - -dimensions=2 - -size=2 - -time=3 - -for grid_size in $GRID_SIZES; - -do - - rm -r grid-${grid_size} - mkdir grid-${grid_size} - cd grid-${grid_size} - - tnl-grid-setup --dimensions $dimensions \ - --origin-x -1.0 \ - --origin-y -1.0 \ - --origin-z -1.0 \ - --proportions-x $size \ - --proportions-y $size \ - --proportions-z $size \ - --size-x ${grid_size} \ - --size-y ${grid_size} \ - --size-z ${grid_size} - - tnl-init --test-function sdf-para \ - --offset 0.25 \ - --output-file init.tnl \ - --final-time 0.0 \ - --snapshot-period 0.1 \ - - - tnl-init --test-function sdf-para-sdf \ - --offset 0.25 \ - --output-file sdf.tnl \ - --final-time 0.0 \ - --snapshot-period 0.1 - - hamilton-jacobi-parallel --initial-condition init.tnl \ - --cfl-condition 1.0e-1 \ - --mesh mesh.tnl \ - --initial-tau 1.0e-3 \ - --epsilon 1.0 \ - --delta 0.0 \ - --stop-time $time \ - --scheme godunov \ - --subgrid-size 8 - - tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff - - cd .. - -done - - -./tnl-err2eoc-2.py --format txt --size $size *.diff - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py deleted file mode 100755 index f8cde3768..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -import sys, string, math - -arguments = sys. argv[1:] -format = "txt" -output_file_name = "eoc-table.txt" -input_files = [] -verbose = 1 -size = 1.0 - -i = 0 -while i < len( arguments ): - if arguments[ i ] == "--format": - format = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--output-file": - output_file_name = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--verbose": - verbose = float( arguments[ i + 1 ] ) - i = i +2 - continue - if arguments[ i ] == "--size": - size = float( arguments[ i + 1 ] ) - i = i +2 - continue - input_files. append( arguments[ i ] ) - i = i + 1 - -if not verbose == 0: - print "Writing to " + output_file_name + " in " + format + "." - -h_list = [] -l1_norm_list = [] -l2_norm_list = [] -max_norm_list = [] -items = 0 - -for file_name in input_files: - if not verbose == 0: - print "Processing file " + file_name - file = open( file_name, "r" ) - - l1_max = 0.0 - l_max_max = 0.0 - file.readline(); - file.readline(); - for line in file. readlines(): - data = string. split( line ) - h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) ) - l1_norm_list. append( float( data[ 1 ] ) ) - l2_norm_list. append( float( data[ 2 ] ) ) - max_norm_list. append( float( data[ 3 ] ) ) - items = items + 1 - if not verbose == 0: - print line - file. close() - -h_width = 12 -err_width = 15 -file = open( output_file_name, "w" ) -if format == "latex": - file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" ) - file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" ) - file. write( " " + string. rjust( " ", h_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error.", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + - "\\\\ \\hline \\hline \n") -if format == "txt": - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - file. write( "| h | L1 Err. | L1 EOC. | L2 Err. | L2 EOC | MAX Err. | MAX EOC |\n" ) - file. write( "+==============+================+================+================+================+================+================+\n" ) - - -i = 0 -while i < items: - if i == 0: - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&"+ - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1; - continue - if h_list[ i ] == h_list[ i - 1 ]: - print "Unable to count eoc since h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" - file. write( " eoc error: h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" ) - else: - h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] ) - l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] ) - l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] ) - max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] ) - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1 - -if format == "latex": - file. write( "\\hline \n" ) - file. write( "\\end{tabular} \n" ) - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h deleted file mode 100644 index 19cdd9493..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h +++ /dev/null @@ -1,366 +0,0 @@ -/*************************************************************************** - tnlParallelEikonalSolver.h - description - ------------------- - begin : Nov 28 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELEIKONALSOLVER_H_ -#define TNLPARALLELEIKONALSOLVER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include - - -#include - -#ifdef HAVE_CUDA -#include -#endif - - -template< int Dimension, - typename SchemeHost, - typename SchemeDevice, - typename Device, - typename RealType = double, - typename IndexType = int > -class tnlParallelEikonalSolver -{}; - -template -class tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int > -{ -public: - - typedef SchemeDevice SchemeTypeDevice; - typedef SchemeHost SchemeTypeHost; - typedef Device DeviceType; - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType; -#ifdef HAVE_CUDA - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA; -#endif - tnlParallelEikonalSolver(); - bool init( const Config::ParameterContainer& parameters ); - void run(); - - void test(); - -/*private:*/ - - - void synchronize(); - - int getOwner( int i) const; - - int getSubgridValue( int i ) const; - - void setSubgridValue( int i, int value ); - - int getBoundaryCondition( int i ) const; - - void setBoundaryCondition( int i, int value ); - - void stretchGrid(); - - void contractGrid(); - - VectorType getSubgrid( const int i ) const; - - void insertSubgrid( VectorType u, const int i ); - - VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID); - - - tnlMeshFunction u0; - VectorType work_u; - IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; - MeshType mesh, subMesh; - -// tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - SchemeHost schemeHost; - SchemeDevice schemeDevice; - double delta, tau0, stopTime,cflCondition; - int gridRows, gridCols, gridLevels, currentStep, n; - - std::clock_t start; - double time_diff; - - - tnlDeviceEnum device; - - tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf() - { - return this; - }; - -#ifdef HAVE_CUDA - - tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; - - double* work_u_cuda; - - int* subgridValues_cuda; - int*boundaryConditions_cuda; - int* unusedCell_cuda; - int* calculationsCount_cuda; - double* tmpw; - //MeshTypeCUDA mesh_cuda, subMesh_cuda; - //SchemeDevice scheme_cuda; - //double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda; - //int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda; - - int* runcuda; - int run_host; - - - __device__ void getSubgridCUDA2D( const int i, tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void updateSubgridCUDA2D( const int i, tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void insertSubgridCUDA2D( double u, const int i ); - - __device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID); - - /*__global__ void runCUDA();*/ - - //__device__ void synchronizeCUDA(); - - __device__ int getOwnerCUDA2D( int i) const; - - __device__ int getSubgridValueCUDA2D( int i ) const; - - __device__ void setSubgridValueCUDA2D( int i, int value ); - - __device__ int getBoundaryConditionCUDA2D( int i ) const; - - __device__ void setBoundaryConditionCUDA2D( int i, int value ); - - //__device__ bool initCUDA( tnlParallelEikonalSolver* cudaSolver); - - /*__global__ void initRunCUDA(tnlParallelEikonalSolver* caller);*/ - -#endif - -}; - - - - - - - - template - class tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int > - { - public: - - typedef SchemeDevice SchemeTypeDevice; - typedef SchemeHost SchemeTypeHost; - typedef Device DeviceType; - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType; - typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshType; - #ifdef HAVE_CUDA - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA; - typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshTypeCUDA; - #endif - tnlParallelEikonalSolver(); - bool init( const Config::ParameterContainer& parameters ); - void run(); - - void test(); - - /*private:*/ - - - void synchronize(); - - int getOwner( int i) const; - - int getSubgridValue( int i ) const; - - void setSubgridValue( int i, int value ); - - int getBoundaryCondition( int i ) const; - - void setBoundaryCondition( int i, int value ); - - void stretchGrid(); - - void contractGrid(); - - VectorType getSubgrid( const int i ) const; - - void insertSubgrid( VectorType u, const int i ); - - VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID); - - - tnlMeshFunction u0; - VectorType work_u; - IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; - MeshType mesh, subMesh; - SchemeHost schemeHost; - SchemeDevice schemeDevice; - double delta, tau0, stopTime,cflCondition; - int gridRows, gridCols, gridLevels, currentStep, n; - - std::clock_t start; - double time_diff; - - - tnlDeviceEnum device; - - tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf() - { - return this; - }; - -#ifdef HAVE_CUDA - - tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; - - double* work_u_cuda; - - int* subgridValues_cuda; - int*boundaryConditions_cuda; - int* unusedCell_cuda; - int* calculationsCount_cuda; - double* tmpw; - //MeshTypeCUDA mesh_cuda, subMesh_cuda; - //SchemeDevice scheme_cuda; - //double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda; - //int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda; - - int* runcuda; - int run_host; - - - __device__ void getSubgridCUDA3D( const int i, tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void updateSubgridCUDA3D( const int i, tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void insertSubgridCUDA3D( double u, const int i ); - - __device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID); - - /*__global__ void runCUDA();*/ - - //__device__ void synchronizeCUDA(); - - __device__ int getOwnerCUDA3D( int i) const; - - __device__ int getSubgridValueCUDA3D( int i ) const; - - __device__ void setSubgridValueCUDA3D( int i, int value ); - - __device__ int getBoundaryConditionCUDA3D( int i ) const; - - __device__ void setBoundaryConditionCUDA3D( int i, int value ); - - //__device__ bool initCUDA( tnlParallelEikonalSolver* cudaSolver); - - /*__global__ void initRunCUDA(tnlParallelEikonalSolver* caller);*/ - -#endif - -}; - - - - - - -#ifdef HAVE_CUDA -template -__global__ void runCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template -__global__ void initRunCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template -__global__ void initCUDA2D( tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3); - -template -__global__ void synchronizeCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - -template -__global__ void synchronize2CUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - - - - - - - -template -__global__ void runCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template -__global__ void initRunCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template -__global__ void initCUDA3D( tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3); - -template -__global__ void synchronizeCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - -template -__global__ void synchronize2CUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); -#endif - - -#ifdef HAVE_CUDA -__cuda_callable__ -double fabsMin( double x, double y) -{ - double fx = fabs(x); - - if(Min(fx,fabs(y)) == fx) - return x; - else - return y; -} - -__cuda_callable__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -#endif - -#include "tnlParallelEikonalSolver2D_impl.h" -#include "tnlParallelEikonalSolver3D_impl.h" -#endif /* TNLPARALLELEIKONALSOLVER_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h deleted file mode 100644 index 76cf49bc8..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h +++ /dev/null @@ -1,1928 +0,0 @@ -/*************************************************************************** - tnlParallelEikonalSolver2D_impl.h - description - ------------------- - begin : Nov 28 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELEIKONALSOLVER2D_IMPL_H_ -#define TNLPARALLELEIKONALSOLVER2D_IMPL_H_ - - -#include "tnlParallelEikonalSolver.h" -#include - -template< typename SchemeHost, typename SchemeDevice, typename Device> -tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() -{ - cout << "a" <device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice --- vypocet na CPU - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - run_host = 1; - } -#endif - - cout << "b" < -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test() -{ -/* - for(int i =0; i < this->subgridValues.getSize(); i++ ) - { - insertSubgrid(getSubgrid(i), i); - } -*/ -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> - -bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters ) -{ - cout << "Initializating solver..." <("mesh"); - this->mesh.load( meshLocation ); - - this->n = parameters.getParameter ("subgrid-size"); - cout << "Setting N to " << this->n <subMesh.setDimensions( this->n, this->n ); - this->subMesh.setDomain( Containers::StaticVector<2,double>(0.0, 0.0), - Containers::StaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) ); - - this->subMesh.save("submesh.tnl"); - - const String& initialCondition = parameters.getParameter ("initial-condition"); - this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) <delta = parameters.getParameter ("delta"); - this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >(); - - cout << "Setting delta to " << this->delta <tau0 = parameters.getParameter ("initial-tau"); - cout << "Setting initial tau to " << this->tau0 <stopTime = parameters.getParameter ("stop-time"); - - this->cflCondition = parameters.getParameter ("cfl-condition"); - this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >()); - cout << "Setting CFL to " << this->cflCondition <stopTime /= (double)(this->gridCols); - this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); - cout << "Setting stopping time to " << this->stopTime <stopTime = 1.5*((double)(this->n))*parameters.getParameter ("stop-time")*this->mesh.template getSpaceStepsProducts< 1, 0 >(); - //cout << "Setting stopping time to " << this->stopTime <schemeHost.init(parameters)) - { - cerr << "SchemeHost failed to initialize." <device == tnlCudaDevice) - { - /*cout << "Testing... " <device == tnlCudaDevice) - { - if( !initCUDA2D(parameters, gridRows, gridCols) ) - return false; - }*/ - //cout << "s" <cudaSolver), sizeof(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >)); - //cout << "s" <cudaSolver, this,sizeof(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); - //cout << "s" <tmpw), this->work_u.getSize()*sizeof(double)); - cudaMalloc(&(this->runcuda), sizeof(int)); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - int* tmpUC; - cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); - cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); - - initCUDA2D<<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s " <work_u_cuda), this->work_u.getSize()*sizeof(double)); - double* tmpu = NULL; - - cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); - //printf("%p %p \n",tmpu,tmpw); - cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s "<device == TNL::Devices::HostDevice) - { - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - - if(! tmp[i].setSize(this->n * this->n)) - cout << "Could not allocate tmp["<< i <<"] array." <device == tnlCudaDevice) - { -// cout << "pre 1 kernel" <n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initRunCUDA2D<<n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); -// cout << "post 1 kernel" <currentStep = 1; - if(this->device == TNL::Devices::HostDevice) - synchronize(); -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); - //cout << test[0] <<" " << test[1] <<" " << test[2] <<" " << test[3] <work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] <<" " << test[1] << " " <<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << test[0] << " " <work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //TNL_CHECK_CUDA_DEVICE; - //cout << this->tmpw << " " << test[0] << " " < -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run() -{ - if(this->device == TNL::Devices::HostDevice) - { - - bool end = false; - while ((this->boundaryConditions.max() > 0 ) || !end) - { - if(this->boundaryConditions.max() == 0 ) - end=true; - else - end=false; -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - if(getSubgridValue(i) != INT_MAX) - { - VectorType tmp; - tmp.setSize(this->n * this->n); - //cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) <calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i); - insertSubgrid( tmp, 2); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - - if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 1)//) - /* &&(!(getBoundaryCondition(i) & 5) && !(getBoundaryCondition(i) & 10)) */) - { - //cout << "3 @ " << getBoundaryCondition(i) <device == tnlCudaDevice) - { - //cout << "fn" <n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cudaMalloc(&runcuda,sizeof(bool)); - //cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice); - //cout << "fn" <runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - //cudaDeviceSynchronize(); - //TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "fn" <<<n*this->n*sizeof(double)>>>(this->cudaSolver); - //cout << "a" <<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - - //cout << "a" <runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); - //cout << "in kernel loop" << run_host <work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice); - //cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->work_u.getData()[0] <work_u.getSize()*sizeof(double)); - //cout << test[0] << test[1] << test[2] << test[3] <work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] << test[1] << test[2] << test[3] <u0.save("u-00001.tnl"); - cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <device == tnlCudaDevice) - { - cudaFree(this->runcuda); - cudaFree(this->tmpw); - cudaFree(this->cudaSolver); - } -#endif - -} - -//north - 1, east - 2, west - 4, south - 8 -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - cout << "Synchronizig..." <currentStep & 1) - { - for(int j = 0; j < this->gridRows - 1; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i; - tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 8) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 1) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1); - } - } - } - - } - else - { - for(int i = 1; i < this->gridCols; i++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - tmp1 = this->gridCols*this->n*j + i*this->n - 1; - tmp2 = this->gridCols*this->n*j + i*this->n ; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); - } - } - } - } - - - this->currentStep++; - int stepValue = this->currentStep + 4; - for (int i = 0; i < this->subgridValues.getSize(); i++) - { - if( getSubgridValue(i) == -INT_MAX ) - setSubgridValue(i, stepValue); - } - - cout << "Grid synchronized at step " << (this->currentStep - 1 ) < -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const -{ - - return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const -{ - return this->subgridValues[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) -{ - this->subgridValues[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const -{ - return this->boundaryConditions[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) -{ - this->boundaryConditions[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() -{ - cout << "Stretching grid..." <gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); - this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); - - //this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ; - //this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ; - - cout << "Setting gridCols to " << this->gridCols << "." <gridRows << "." <subgridValues.setSize(this->gridCols*this->gridRows); - this->subgridValues.setValue(0); - this->boundaryConditions.setSize(this->gridCols*this->gridRows); - this->boundaryConditions.setValue(0); - this->calculationsCount.setSize(this->gridCols*this->gridRows); - this->calculationsCount.setValue(0); - - for(int i = 0; i < this->subgridValues.getSize(); i++ ) - { - this->subgridValues[i] = INT_MAX; - this->boundaryConditions[i] = 0; - } - - int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; - - if(!this->work_u.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched grid." <unusedCell.setSize(stretchedSize)) - cerr << "Could not allocate memory for supporting stretched grid." <mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <unusedCell[i] = 1; - int diff =(this->n*this->gridCols) - idealStretch ; - //cout << "diff = " << diff <n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if(i%(this->n*this->gridCols) - idealStretch >= 0) - { - //cout << i%(this->n*this->gridCols) - idealStretch +1 <n*this->gridCols) - idealStretch +1 ; - } - - if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) - { - //cout << i/(this->n*this->gridCols) - idealStretch + 1 <n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; - } - - //cout << "i = " << i << " : i-k = " << i-k <n*this->gridCols)) - ( (this->mesh.getDimensions().x() - this->n)/(this->n - 1) + this->mesh.getDimensions().x() - 1) - + (this->n*this->gridCols - this->mesh.getDimensions().x())*(i/(this->n*this->n*this->gridCols)) ; - - if(j > 0) - k += j; - - int l = i-k - (this->u0.getSize() - 1); - int m = (l % this->mesh.getDimensions().x()); - - if(l>0) - k+= l + ( (l / this->mesh.getDimensions().x()) + 1 )*this->mesh.getDimensions().x() - (l % this->mesh.getDimensions().x());*/ - - this->work_u[i] = this->u0[i-k]; - //cout << (i-k) < -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() -{ - cout << "Contracting grid..." <n*this->n*this->gridCols*this->gridRows; - - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0)) - { - //cout << i <<" : " <u0[i-k] = this->work_u[i]; - } - - } - - cout << "Grid contracted" < -typename tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const -{ - VectorType u; - u.setSize(this->n*this->n); - - for( int j = 0; j < u.getSize(); j++) - { - u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - return u; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) -{ - - for( int j = 0; j < this->n*this->n; j++) - { - int index = (i / this->gridCols)*this->n*this->n*this->gridCols - + (i % this->gridCols)*this->n - + (j/this->n)*this->n*this->gridCols - + (j % this->n); - //OMP LOCK index - if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) ) - { - this->work_u[index] = u[j]; - this->unusedCell[index] = 0; - } - //OMP UNLOCK index - } -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID) -{ - - VectorType fu; - - fu.setLike(u); - fu.setValue( 0.0 ); - -/* - * Insert Euler-Solver Here - */ - - /**/ - - /*for(int i = 0; i < u.getSize(); i++) - { - int x = this->subMesh.getCellCoordinates(i).x(); - int y = this->subMesh.getCellCoordinates(i).y(); - - if(x == 0 && (boundaryCondition & 4) && y ==0) - { - if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = 0; y = 0" <(); - } - } - else if(x == 0 && (boundaryCondition & 4) && y == subMesh.getDimensions().y() - 1) - { - if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = 0; y = n" <(); - } - } - - - else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y ==0) - { - if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = n; y = 0" <(); - } - } - else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y == subMesh.getDimensions().y() - 1) - { - if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = n; y = n" <(); - } - } - - - else if(y == 0 && (boundaryCondition & 8) && x ==0) - { - if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) - { - //cout << "y = 0; x = 0" <(); - } - } - else if(y == 0 && (boundaryCondition & 8) && x == subMesh.getDimensions().x() - 1) - { - if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) - { - //cout << "y = 0; x = n" <(); - } - } - - - else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x ==0) - { - if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) { - //cout << "y = n; x = 0" <(); - } - } - else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x == subMesh.getDimensions().x() - 1) - { - if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) - { - //cout << "y = n; x = n" <(); - } - } - }*/ - - /**/ - - -/* bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - } - - - if(tmp) - {} - else if(boundaryCondition == 4) - { - int i; - for(i = 0; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) - { - int j; - for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - else if(boundaryCondition == 8) - { - int i; - for(i = 0; i < subMesh.getDimensions().x() - 1; i=subMesh.getCellXSuccessor(i)) - { - int j; - for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - - } - else if(boundaryCondition == 2) - { - int i; - for(i = subMesh.getDimensions().x() - 1; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) - { - int j; - for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - else if(boundaryCondition == 1) - { - int i; - for(i = (subMesh.getDimensions().y() - 1)*subMesh.getDimensions().x(); i < u.getSize() - 1; i=subMesh.getCellXSuccessor(i)) - { - int j; - for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } -*/ - /**/ - - - - bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - int centerGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1); - if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0) - tmp = true; - } - //if(this->currentStep + 3 < getSubgridValue(subGridID)) - //tmp = true; - - - double value = sign(u[0]) * u.absMax(); - - if(tmp) - {} - - - //north - 1, east - 2, west - 4, south - 8 - else if(boundaryCondition == 4) - { - for(int i = 0; i < this->n; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 2) - { - for(int i = 0; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 1) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 8) - { - for(int j = 0; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } - -/* - - else if(boundaryCondition == 5) - { - for(int i = 0; i < this->n - 1; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 10) - { - for(int i = 1; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 3) - { - for(int j = 0; j < this->n - 1; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 12) - { - for(int j = 1; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } -*/ - - - /**/ - - /*if (u.max() > 0.0) - this->stopTime *=(double) this->gridCols;*/ - - - double time = 0.0; - double currentTau = this->tau0; - double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - double maxResidue( 1.0 ); - //double lastResidue( 10000.0 ); - tnlGridEntity Entity(subMesh); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0 >()*/) - { - /**** - * Compute the RHS - */ - - for( int i = 0; i < fu.getSize(); i ++ ) - { - Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x())); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities); - } - maxResidue = fu. absMax(); - - - if( this -> cflCondition * maxResidue != 0.0) - currentTau = this -> cflCondition / maxResidue; - - /* if (maxResidue < 0.05) - std::cout << "Max < 0.05" < 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) - { - //cout << currentTau << " >= " << 2.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >() <subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - /*if(maxResidue > lastResidue) - currentTau *=(1.0/10.0);*/ - - - if( time + currentTau > finalTime ) currentTau = finalTime - time; -// for( int i = 0; i < fu.getSize(); i ++ ) -// { -// //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] <subgridValues[subGridID] == this->currentStep +4) ) - u[ i ] = add; - } - time += currentTau; - - //cout << '\r' << flush; - //cout << maxResidue << " " << currentTau << " @ " << time << flush; - //lastResidue = maxResidue; - } - //cout << "Time: " << time << ", Res: " << maxResidue < 0.0) - this->stopTime /=(double) this->gridCols;*/ - - VectorType solution; - solution.setLike(u); - for( int i = 0; i < u.getSize(); i ++ ) - { - solution[i]=u[i]; - } - return solution; -} - - -#ifdef HAVE_CUDA - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - //int j = threadIdx.x + threadIdx.y * blockDim.x; - int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - //printf("i= %d,j= %d,th= %d\n",i,j,th); - *a = caller->work_u_cuda[th]; - //printf("Hi %f \n", *a); - //return ret; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ -// int j = threadIdx.x + threadIdx.y * blockDim.x; - int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - - if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) - { - caller->work_u_cuda[index] = *a; - caller->unusedCell_cuda[index] = 0; - - } - - *a = caller->work_u_cuda[index]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i ) -{ - - -// int j = threadIdx.x + threadIdx.y * blockDim.x; - //printf("j = %d, u = %f\n", j,u); - - int index = (blockIdx.y)*this->n*this->n*this->gridCols - + (blockIdx.x)*this->n - + threadIdx.y*this->n*this->gridCols - + threadIdx.x; - - //printf("i= %d,j= %d,index= %d\n",i,j,index); - if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) - { - this->work_u_cuda[index] = u; - this->unusedCell_cuda[index] = 0; - - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID) -{ - - __shared__ int tmp; - __shared__ double value; - //double tmpRes = 0.0; - volatile double* sharedTau = &u[blockDim.x*blockDim.y]; - volatile double* absVal = &u[2*blockDim.x*blockDim.y]; - int i = threadIdx.x; - int j = threadIdx.y; - int l = threadIdx.y * blockDim.x + threadIdx.x; - bool computeFU = !((i == 0 && (boundaryCondition & 4)) or - (i == blockDim.x - 1 && (boundaryCondition & 2)) or - (j == 0 && (boundaryCondition & 8)) or - (j == blockDim.y - 1 && (boundaryCondition & 1))); - - if(l == 0) - { - tmp = 0; - int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1); - if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0) - tmp = 1; - } - __syncthreads(); - - /*if(!tmp && (u[0]*u[l] <= 0.0)) - atomicMax( &tmp, 1);*/ - - __syncthreads(); - if(tmp !=1) - { -// if(computeFU) -// absVal[l]=0.0; -// else -// absVal[l] = fabs(u[l]); -// -// __syncthreads(); -// -// if((blockDim.x == 16) && (l < 128)) absVal[l] = Max(absVal[l],absVal[l+128]); -// __syncthreads(); -// if((blockDim.x == 16) && (l < 64)) absVal[l] = Max(absVal[l],absVal[l+64]); -// __syncthreads(); -// if(l < 32) absVal[l] = Max(absVal[l],absVal[l+32]); -// if(l < 16) absVal[l] = Max(absVal[l],absVal[l+16]); -// if(l < 8) absVal[l] = Max(absVal[l],absVal[l+8]); -// if(l < 4) absVal[l] = Max(absVal[l],absVal[l+4]); -// if(l < 2) absVal[l] = Max(absVal[l],absVal[l+2]); -// if(l < 1) value = sign(u[0])*Max(absVal[l],absVal[l+1]); -// __syncthreads(); -// -// if(computeFU) -// u[l] = value; - if(computeFU) - { - if(boundaryCondition == 4) - u[l] = u[threadIdx.y * blockDim.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n); - else if(boundaryCondition == 2) - u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); - else if(boundaryCondition == 8) - u[l] = u[threadIdx.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n); - else if(boundaryCondition == 1) - u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); - } - } - - double time = 0.0; - __shared__ double currentTau; - double cfl = this->cflCondition; - double fu = 0.0; -// if(threadIdx.x * threadIdx.y == 0) -// { -// currentTau = finalTime; -// } - double finalTime = this->stopTime; - __syncthreads(); -// if( time + currentTau > finalTime ) currentTau = finalTime - time; - - tnlGridEntity Entity(subMesh); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Entity.setCoordinates(Containers::StaticVector<2,int>(i,j)); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - - - while( time < finalTime ) - { - if(computeFU) - fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighborEntities); - - sharedTau[l]=abs(cfl/fu); - - if(l == 0) - { - if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - else if(l == blockDim.x*blockDim.y - 1) - if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - - -// if( (sign(u[l]+sharedTau[l]*fu) != sign(u[l])) && fu != 0.0 && fu != -0.0) -// { -// printf("orig: %10f", sharedTau[l]); -// sharedTau[l]=abs(u[l]/(1.1*fu)) ; -// printf(" new: %10f\n", sharedTau[l]); -// } - - - - if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); - __syncthreads(); - if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); - __syncthreads(); - if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); - if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); - if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); - if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); - if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); - if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); - __syncthreads(); - - u[l] += currentTau * fu; - time += currentTau; - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const -{ - - return ((i / (this->gridCols*this->n*this->n))*this->gridCols - + (i % (this->gridCols*this->n))/this->n); -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const -{ - return this->subgridValues_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value) -{ - this->subgridValues_cuda[i] = value; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const -{ - return this->boundaryConditions_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value) -{ - this->boundaryConditions_cuda[i] = value; -} - - - -//north - 1, east - 2, west - 4, south - 8 - -template -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - - __shared__ int boundary[4]; // north,east,west,south - __shared__ int subgridValue; - __shared__ int newSubgridValue; - - - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - double u = cudaSolver->work_u_cuda[gid]; - double u_cmp; - int subgridValue_cmp=INT_MAX; - int boundary_index=0; - - - if(threadIdx.x+threadIdx.y == 0) - { - subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x); - boundary[0] = 0; - boundary[1] = 0; - boundary[2] = 0; - boundary[3] = 0; - newSubgridValue = 0; - //printf("%d %d\n", blockDim.x, gridDim.x); - } - __syncthreads(); - - - - if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) - { - if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1); - boundary_index = 2; - } - - if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1); - boundary_index = 1; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x); - boundary_index = 3; - } - if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x); - boundary_index = 0; - } - -// __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - } - } - __threadfence(); - __syncthreads(); - - if(threadIdx.x+threadIdx.y == 0) - { - if(subgridValue == INT_MAX && newSubgridValue !=0) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX); - - cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, boundary[0] + - 2 * boundary[1] + - 4 * boundary[2] + - 8 * boundary[3]); - - - if(blockIdx.x+blockIdx.y ==0) - { - cudaSolver->currentStep = cudaSolver->currentStep + 1; - *(cudaSolver->runcuda) = 0; - } -// -// int stepValue = cudaSolver->currentStep + 4; -// if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) -// cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); -// -// atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); - } - - - /* - //printf("I am not an empty kernel!\n"); - //cout << "Synchronizig..." <currentStep & 1) - { - //printf("I am not an empty kernel! 1\n"); - for(int j = 0; j < cudaSolver->gridRows - 1; j++) - { - //printf("I am not an empty kernel! 3\n"); - for (int i = 0; i < cudaSolver->gridCols*cudaSolver->n; i++) - { - tmp1 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n-1)+j*cudaSolver->n) + i; - tmp2 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n)+j*cudaSolver->n) + i; - grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); - grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); - - if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; - cudaSolver->unusedCell_cuda[tmp2] = 0; - if(grid2 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 8) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+8); - } - else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; - cudaSolver->unusedCell_cuda[tmp1] = 0; - if(grid1 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 1) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+1); - } - } - } - - } - else - { - //printf("I am not an empty kernel! 2\n"); - for(int i = 1; i < cudaSolver->gridCols; i++) - { - //printf("I am not an empty kernel! 4\n"); - for (int j = 0; j < cudaSolver->gridRows*cudaSolver->n; j++) - { - - tmp1 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n - 1; - tmp2 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n ; - grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); - grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); - - if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; - cudaSolver->unusedCell_cuda[tmp2] = 0; - if(grid2 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 4) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+4); - } - else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; - cudaSolver->unusedCell_cuda[tmp1] = 0; - if(grid1 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 2) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+2); - } - } - } - } - //printf("I am not an empty kernel! 5 cudaSolver->currentStep : %d \n", cudaSolver->currentStep); - - cudaSolver->currentStep = cudaSolver->currentStep + 1; - int stepValue = cudaSolver->currentStep + 4; - for (int i = 0; i < cudaSolver->gridRows * cudaSolver->gridCols; i++) - { - if( cudaSolver->getSubgridValueCUDA2D(i) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(i, stepValue); - } - - int maxi = 0; - for(int q=0; q < cudaSolver->gridRows*cudaSolver->gridCols;q++) - { - //printf("%d : %d\n", q, cudaSolver->boundaryConditions_cuda[q]); - maxi=Max(maxi,cudaSolver->getBoundaryConditionCUDA2D(q)); - } - //printf("I am not an empty kernel! %d\n", maxi); - *(cudaSolver->runcuda) = (maxi > 0); - //printf("I am not an empty kernel! 7 %d\n", cudaSolver->boundaryConditions_cuda[0]); - //cout << "Grid synchronized at step " << (this->currentStep - 1 ) < -__global__ -void synchronize2CUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) -{ -// if(blockIdx.x+blockIdx.y ==0) -// { -// cudaSolver->currentStep = cudaSolver->currentStep + 1; -// *(cudaSolver->runcuda) = 0; -// } - - int stepValue = cudaSolver->currentStep + 4; - if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); - - atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); -} - - - - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA2D( tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3) -{ - //cout << "Initializating solver..." <("mesh"); - //this->mesh_cuda.load( meshLocation ); - - //this->n_cuda = parameters.getParameter ("subgrid-size"); - //cout << "Setting N << this->n_cuda <subMesh_cuda.setDimensions( this->n_cuda, this->n_cuda ); - //this->subMesh_cuda.setDomain( Containers::StaticVector<2,double>(0.0, 0.0), - //Containers::StaticVector<2,double>(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n_cuda), this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n_cuda)) ); - - //this->subMesh_cuda.save("submesh.tnl"); - -// const String& initialCondition = parameters.getParameter ("initial-condition"); -// this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) <delta_cuda = parameters.getParameter ("delta"); - //this->delta_cuda *= this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >(); - - //cout << "Setting delta to " << this->delta <tau0_cuda = parameters.getParameter ("initial-tau"); - //cout << "Setting initial tau to " << this->tau0_cuda <stopTime_cuda = parameters.getParameter ("stop-time"); - - //this->cflCondition_cuda = parameters.getParameter ("cfl-condition"); - //this -> cflCondition_cuda *= sqrt(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()); - //cout << "Setting CFL to " << this->cflCondition <gridRows_cuda = gridRows; -// this->gridCols_cuda = gridCols; - - cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); - cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); - cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); - *(cudaSolver->runcuda) = 1; - cudaSolver->currentStep = 1; - //cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost); - //ptr = cudaSolver->work_u_cuda; - printf("GPU memory allocated.\n"); - - for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) - { - cudaSolver->subgridValues_cuda[i] = INT_MAX; - cudaSolver->boundaryConditions_cuda[i] = 0; - } - - /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) - { - printf("%d\n",j); - cudaSolver->unusedCell_cuda[ j] = 1; - }*/ - printf("GPU memory initialized.\n"); - - - //cudaSolver->work_u_cuda[50] = 32.153438; -//// -//// - //stretchGrid(); - //this->stopTime_cuda /= (double)(this->gridCols_cuda); - //this->stopTime_cuda *= (1.0+1.0/((double)(this->n_cuda) - 1.0)); - //cout << "Setting stopping time to " << this->stopTime <stopTime_cuda = 1.5*((double)(this->n_cuda))*parameters.getParameter ("stop-time")*this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >(); - //cout << "Setting stopping time to " << this->stopTime <schemeDevice.init(parameters)) -// { - //cerr << "Scheme failed to initialize." <currentStep_cuda = 1; - //return true; -} - - - - -//extern __shared__ double array[]; -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initRunCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) - -{ - - - extern __shared__ double u[]; - //printf("%p\n",caller->work_u_cuda); - - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - - __shared__ int containsCurve; - if(l == 0) - containsCurve = 0; - - //double a; - caller->getSubgridCUDA2D(i,caller, &u[l]); - //printf("%f %f\n",a , u[l]); - //u[l] = a; - //printf("Hi %f \n", u[l]); - __syncthreads(); - //printf("hurewrwr %f \n", u[l]); - if(u[0] * u[l] <= 0.0) - { - //printf("contains %d \n",i); - atomicMax( &containsCurve, 1); - } - - __syncthreads(); - //printf("hu"); - //printf("%d : %f\n", l, u[l]); - if(containsCurve == 1) - { - //printf("have curve \n"); - caller->runSubgridCUDA2D(0,u,i); - //printf("%d : %f\n", l, u[l]); - __syncthreads(); - caller->insertSubgridCUDA2D(u[l],i); - __syncthreads(); - if(l == 0) - caller->setSubgridValueCUDA2D(i, 4); - } - - -} - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/runCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) -{ - extern __shared__ double u[]; - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - int bound = caller->getBoundaryConditionCUDA2D(i); - - if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0) - { - caller->getSubgridCUDA2D(i,caller, &u[l]); - - //if(l == 0) - //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA2D(i)); - if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4) - { - if(bound & 1) - { - caller->runSubgridCUDA2D(1,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 2 ) - { - caller->runSubgridCUDA2D(2,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 4) - { - caller->runSubgridCUDA2D(4,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 8) - { - caller->runSubgridCUDA2D(8,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - if( ((bound & 3 ))) - { - caller->runSubgridCUDA2D(3,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 5 ))) - { - caller->runSubgridCUDA2D(5,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 10 ))) - { - caller->runSubgridCUDA2D(10,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 12 )) - { - caller->runSubgridCUDA2D(12,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - } - - - else - { - - - - - - - - - - if( ((bound == 2))) - { - caller->runSubgridCUDA2D(2,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 1) )) - { - caller->runSubgridCUDA2D(1,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 8) )) - { - caller->runSubgridCUDA2D(8,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound == 4)) - { - caller->runSubgridCUDA2D(4,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - - - - - - if( ((bound & 3) )) - { - caller->runSubgridCUDA2D(3,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 5) )) - { - caller->runSubgridCUDA2D(5,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 10) )) - { - caller->runSubgridCUDA2D(10,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 12) ) - { - caller->runSubgridCUDA2D(12,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - - - - - - - - } - /*if( bound ) - { - caller->runSubgridCUDA2D(15,u,i); - __syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - }*/ - - if(l==0) - { - caller->setBoundaryConditionCUDA2D(i, 0); - caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 ); - } - - - } - - - -} - -#endif /*HAVE_CUDA*/ - -#endif /* TNLPARALLELEIKONALSOLVER2D_IMPL_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h deleted file mode 100644 index dc3fd5467..000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +++ /dev/null @@ -1,1706 +0,0 @@ -/*************************************************************************** - tnlParallelEikonalSolver2D_impl.h - description - ------------------- - begin : Nov 28 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELEIKONALSOLVER3D_IMPL_H_ -#define TNLPARALLELEIKONALSOLVER3D_IMPL_H_ - - -#include "tnlParallelEikonalSolver.h" -#include - -template< typename SchemeHost, typename SchemeDevice, typename Device> -tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() -{ - cout << "a" <device = TNL::Devices::HostDevice; /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice --- vypocet na CPU - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - run_host = 1; - } -#endif - - cout << "b" < -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::test() -{ -/* - for(int i =0; i < this->subgridValues.getSize(); i++ ) - { - insertSubgrid(getSubgrid(i), i); - } -*/ -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> - -bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters ) -{ - cout << "Initializating solver..." <("mesh"); - this->mesh.load( meshLocation ); - - this->n = parameters.getParameter ("subgrid-size"); - cout << "Setting N to " << this->n <subMesh.setDimensions( this->n, this->n, this->n ); - this->subMesh.setDomain( Containers::StaticVector<3,double>(0.0, 0.0, 0.0), - Containers::StaticVector<3,double>(mesh.template getSpaceStepsProducts< 1, 0, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1, 0 >()*(double)(this->n),mesh.template getSpaceStepsProducts< 0, 0, 1 >()*(double)(this->n)) ); - - this->subMesh.save("submesh.tnl"); - - const String& initialCondition = parameters.getParameter ("initial-condition"); - this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) <delta = parameters.getParameter ("delta"); - this->delta *= mesh.template getSpaceStepsProducts< 1, 0, 0 >()*mesh.template getSpaceStepsProducts< 0, 1, 0 >(); - - cout << "Setting delta to " << this->delta <tau0 = parameters.getParameter ("initial-tau"); - cout << "Setting initial tau to " << this->tau0 <stopTime = parameters.getParameter ("stop-time"); - - this->cflCondition = parameters.getParameter ("cfl-condition"); - this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0, 0 >()*mesh.template getSpaceStepsProducts< 0, 1, 0 >()); - cout << "Setting CFL to " << this->cflCondition <stopTime /= (double)(this->gridCols); - this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); - cout << "Setting stopping time to " << this->stopTime <stopTime = 1.5*((double)(this->n))*parameters.getParameter ("stop-time")*mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - //cout << "Setting stopping time to " << this->stopTime <schemeHost.init(parameters)) - { - cerr << "SchemeHost failed to initialize." <device == tnlCudaDevice) - { - /*cout << "Testing... " <device == tnlCudaDevice) - { - if( !initCUDA3D(parameters, gridRows, gridCols) ) - return false; - }*/ - //cout << "s" <cudaSolver), sizeof(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >)); - //cout << "s" <cudaSolver, this,sizeof(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); - //cout << "s" <tmpw), this->work_u.getSize()*sizeof(double)); - cudaMalloc(&(this->runcuda), sizeof(int)); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - int* tmpUC; - cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); - cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); - - initCUDA3D<<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s " <work_u_cuda), this->work_u.getSize()*sizeof(double)); - double* tmpu = NULL; - - cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); - //printf("%p %p \n",tmpu,tmpw); - cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s "<device == TNL::Devices::HostDevice) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - bool containsCurve = false; -// cout << "Working on subgrid " << i <<" --- check 1" <n*this->n*this->n)) - cout << "Could not allocate tmp["<< i <<"] array." <device == tnlCudaDevice) - { -// cout << "pre 1 kernel" <n, this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initRunCUDA3D<<n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); -// cout << "post 1 kernel" <currentStep = 1; - if(this->device == TNL::Devices::HostDevice) - synchronize(); -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - dim3 threadsPerBlock(this->n, this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); - //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); - //cout << test[0] <<" " << test[1] <<" " << test[2] <<" " << test[3] <work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] <<" " << test[1] << " " <<<>>(this->cudaSolver); - cout << cudaGetErrorString(cudaDeviceSynchronize()) <<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << test[0] << " " <work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //TNL_CHECK_CUDA_DEVICE; - //cout << this->tmpw << " " << test[0] << " " < -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::run() -{ - if(this->device == TNL::Devices::HostDevice) - { - - bool end = false; - while (/*(this->boundaryConditions.max() > 0 ) ||*/ !end) - { - if(this->boundaryConditions.max() == 0 || this->subgridValues.max() < 0) - end=true; - else - end=false; -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - VectorType tmp; - tmp.setSize(this->n*this->n*this->n); - if(getSubgridValue(i) != INT_MAX) - { - //cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) <calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(2, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 16) - { - tmp = getSubgrid(i); - tmp = runSubgrid(16, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 32) - { - tmp = getSubgrid(i); - tmp = runSubgrid(32, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - - if( getBoundaryCondition(i) & 19) - { - tmp = getSubgrid(i); - tmp = runSubgrid(19, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 21) - { - tmp = getSubgrid(i); - tmp = runSubgrid(21, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 26) - { - tmp = getSubgrid(i); - tmp = runSubgrid(26, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 28) - { - tmp = getSubgrid(i); - tmp = runSubgrid(28, tmp ,i); - insertSubgrid( tmp, i); - } - - if( getBoundaryCondition(i) & 35) - { - tmp = getSubgrid(i); - tmp = runSubgrid(35, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 37) - { - tmp = getSubgrid(i); - tmp = runSubgrid(37, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 42) - { - tmp = getSubgrid(i); - tmp = runSubgrid(42, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 44) - { - tmp = getSubgrid(i); - tmp = runSubgrid(44, tmp ,i); - insertSubgrid( tmp, i); - } - - - setBoundaryCondition(i, 0); - setSubgridValue(i, getSubgridValue(i)-1); - - } - } - synchronize(); - } - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - //cout << "fn" <n, this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cudaMalloc(&runcuda,sizeof(bool)); - //cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice); - //cout << "fn" <runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - //cudaDeviceSynchronize(); - //TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "fn" <<<n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - //cout << "a" <<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA3D<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - - //cout << "a" <runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); - //cout << "in kernel loop" << run_host <work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice); - //cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->work_u.getData()[0] <work_u.getSize()*sizeof(double)); - //cout << test[0] << test[1] << test[2] << test[3] <work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] << test[1] << test[2] << test[3] <u0.save("u-00001.tnl"); - cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <device == tnlCudaDevice) - { - cudaFree(this->runcuda); - cudaFree(this->tmpw); - cudaFree(this->cudaSolver); - } -#endif - -} - -//north - 1, east - 2, west - 4, south - 8 -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - cout << "Synchronizig..." <currentStep & 1) -// { - for(int j = 0; j < this->gridRows - 1; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - for (int k = 0; k < this->gridLevels*this->n; k++) - { -// cout << "a" <gridCols*this->n*((this->n-1)+j*this->n) + i + k*this->gridCols*this->n*this->gridRows*this->n; -// cout << "b" <gridCols*this->n*((this->n)+j*this->n) + i + k*this->gridCols*this->n*this->gridRows*this->n; -// cout << "c" < work_u.getSize()) - cout << "tmp1: " << tmp1 << " x: " << j <<" y: " << i <<" z: " << k < work_u.getSize()) - cout << "tmp2: " << tmp2 << " x: " << j <<" y: " << i <<" z: " << k <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; -// cout << "f" <unusedCell[tmp2] = 0; -// cout << "g" <work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; -// cout << "j" <unusedCell[tmp1] = 0; -// cout << "k" <gridCols; i++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - for (int k = 0; k < this->gridLevels*this->n; k++) - { - tmp1 = this->gridCols*this->n*j + i*this->n - 1 + k*this->gridCols*this->n*this->gridRows*this->n; - tmp2 = this->gridCols*this->n*j + i*this->n + k*this->gridCols*this->n*this->gridRows*this->n; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j, k" << i << "," << j << "," << k <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); - } - } - } - } - - cout << "sync 3" <gridLevels; k++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - tmp1 = this->gridCols*this->n*j + i + (k*this->n-1)*this->gridCols*this->n*this->gridRows*this->n; - tmp2 = this->gridCols*this->n*j + i + k*this->n*this->gridCols*this->n*this->gridRows*this->n; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j, k" << i << "," << j << "," << k <work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 32) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+32); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 16) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+16); - } - } - } - } -// } - - - - this->currentStep++; - int stepValue = this->currentStep + 4; - for (int i = 0; i < this->subgridValues.getSize(); i++) - { - if( getSubgridValue(i) == -INT_MAX ) - setSubgridValue(i, stepValue); - } - - cout << "Grid synchronized at step " << (this->currentStep - 1 ) < -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const -{ - - int j = i % (this->gridCols*this->gridRows*this->n*this->n); - - return ( (i / (this->gridCols*this->gridRows*this->n*this->n*this->n))*this->gridCols*this->gridRows - + (j / (this->gridCols*this->n*this->n))*this->gridCols - + (j % (this->gridCols*this->n))/this->n); -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const -{ - return this->subgridValues[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) -{ - this->subgridValues[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const -{ - return this->boundaryConditions[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) -{ - this->boundaryConditions[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() -{ - cout << "Stretching grid..." <gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); - this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); - this->gridLevels = ceil( ((double)(this->mesh.getDimensions().z()-1)) / ((double)(this->n-1)) ); - - //this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ; - //this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ; - - cout << "Setting gridCols to " << this->gridCols << "." <gridRows << "." <gridLevels << "." <subgridValues.setSize(this->gridCols*this->gridRows*this->gridLevels); - this->subgridValues.setValue(0); - this->boundaryConditions.setSize(this->gridCols*this->gridRows*this->gridLevels); - this->boundaryConditions.setValue(0); - this->calculationsCount.setSize(this->gridCols*this->gridRows*this->gridLevels); - this->calculationsCount.setValue(0); - - for(int i = 0; i < this->subgridValues.getSize(); i++ ) - { - this->subgridValues[i] = INT_MAX; - this->boundaryConditions[i] = 0; - } - - int levelSize = this->n*this->n*this->gridCols*this->gridRows; - int stretchedSize = this->n*levelSize*this->gridLevels; - - if(!this->work_u.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched grid." <unusedCell.setSize(stretchedSize)) - cerr << "Could not allocate memory for supporting stretched grid." <mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <n*this->gridCols) - idealStretch ; - - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if(i%(this->n*this->gridCols) - idealStretch >= 0) - { - k+= i%(this->n*this->gridCols) - idealStretch +1 ; - } - - if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) - { - k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; - } - - for( int j = 0; jn*this->gridLevels; j++) - { - this->unusedCell[i+j*levelSize] = 1; - int l = j/this->n; - - if(j - idealStretch >= 0) - { - l+= j - idealStretch + 1; - } - - this->work_u[i+j*levelSize] = this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k]; - } - - } - - - - cout << "Grid stretched." < -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() -{ - cout << "Contracting grid..." <n*this->n*this->gridCols*this->gridRows; - int stretchedSize = this->n*levelSize*this->gridLevels; - - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0) ) - { - for( int j = 0; jn*this->gridLevels; j++) - { - int l = j/this->n; - if(j - idealStretch < 0) - this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+j*levelSize]; - } - } - - } - - cout << "Grid contracted" < -typename tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const -{ - - VectorType u; - u.setSize(this->n*this->n*this->n); - - int idx, idy, idz; - idz = i / (gridRows*this->gridCols); - idy = (i % (this->gridRows*this->gridCols)) / this->gridCols; - idx = i % (this->gridCols); - - for( int j = 0; j < this->n; j++) - { - // int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); - for( int k = 0; k < this->n; k++) - { - for( int l = 0; l < this->n; l++) - { - int index = (idz*this->n + l) * this->n*this->n*this->gridCols*this->gridRows - + (idy) * this->n*this->n*this->gridCols - + (idx) * this->n - + k * this->n*this->gridCols - + j; - - u[j + k*this->n + l*this->n*this->n] = this->work_u[ index ]; - } - } - } - return u; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) -{ - int idx, idy, idz; - idz = i / (this->gridRows*this->gridCols); - idy = (i % (this->gridRows*this->gridCols)) / this->gridCols; - idx = i % (this->gridCols); - - for( int j = 0; j < this->n; j++) - { - // int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); - for( int k = 0; k < this->n; k++) - { - for( int l = 0; l < this->n; l++) - { - - int index = (idz*this->n + l) * this->n*this->n*this->gridCols*this->gridRows - + (idy) * this->n*this->n*this->gridCols - + (idx) * this->n - + k * this->n*this->gridCols - + j; - - //OMP LOCK index -// cout<< idx << " " << idy << " " << idz << " " << j << " " << k << " " << l << " " << idz << " " << unusedCell.getSize() << " " << u.getSize() << " " << index <work_u[index]) > fabs(u[j + k*this->n + l*this->n*this->n])) || (this->unusedCell[index] == 1) ) - { - this->work_u[index] = u[j + k*this->n + l*this->n*this->n]; - this->unusedCell[index] = 0; - } - //OMP UNLOCK index - } - } - } -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID) -{ - - VectorType fu; - - fu.setLike(u); - fu.setValue( 0.0 ); - - - bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - } - int idx,idy,idz; - idz = subGridID / (this->gridRows*this->gridCols); - idy = (subGridID % (this->gridRows*this->gridCols)) / this->gridCols; - idx = subGridID % (this->gridCols); - int centerGID = (this->n*idy + (this->n>>1) )*(this->n*this->gridCols) + this->n*idx + (this->n>>1) - + ((this->n>>1)+this->n*idz)*this->n*this->n*this->gridRows*this->gridCols; - if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0) - tmp = true; - //if(this->currentStep + 3 < getSubgridValue(subGridID)) - //tmp = true; - - - double value = sign(u[0]) * u.absMax(); - - if(tmp) - {} - - - //north - 1, east - 2, west - 4, south - 8 - else if(boundaryCondition == 4) - { - for(int i = 0; i < this->n; i++) - for(int j = 1;j < this->n; j++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[k*this->n*this->n + i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 2) - { - for(int i = 0; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[k*this->n*this->n + i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 1) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n - 1; i++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 8) - { - for(int j = 0; j < this->n; j++) - for(int i = 1;i < this->n; i++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j]; - } - else if(boundaryCondition == 16) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n ; i++) - for(int k = 0;k < this->n-1; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 32) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n; i++) - for(int k = 1;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j]; - } - - - double time = 0.0; - double currentTau = this->tau0; - double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); - if(boundaryCondition == 0) finalTime *= 2.0; - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - double maxResidue( 1.0 ); - //double lastResidue( 10000.0 ); - tnlGridEntity Entity(subMesh); - tnlNeighborGridEntityGetter,3> neighborEntities(Entity); - while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*/) - { - /**** - * Compute the RHS - */ - - for( int i = 0; i < fu.getSize(); i ++ ) - { -// std::cout << "i: " << i << ", time: " << time < coords(i % subMesh.getDimensions().x(), - (i % (subMesh.getDimensions().x()*subMesh.getDimensions().y())) / subMesh.getDimensions().x(), - i / (subMesh.getDimensions().x()*subMesh.getDimensions().y())); -// cout << "b " << i << " " << i % subMesh.getDimensions().x() << " " << (i % (subMesh.getDimensions().x()*subMesh.getDimensions().y())) << " " << (i % subMesh.getDimensions().x()*subMesh.getDimensions().y()) / subMesh.getDimensions().x() << " " << subMesh.getDimensions().x()*subMesh.getDimensions().y() << " " <subMesh, i, coords,u, time, boundaryCondition, neighborEntities ); -// std::cout << "f" < cflCondition * maxResidue != 0.0) - currentTau = this -> cflCondition / maxResidue; - - /* if (maxResidue < 0.05) - std::cout << "Max < 0.05" < 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()) - currentTau = 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - /*if(maxResidue > lastResidue) - currentTau *=(1.0/10.0);*/ - - - if( time + currentTau > finalTime ) currentTau = finalTime - time; -// for( int i = 0; i < fu.getSize(); i ++ ) -// { -// //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] <subgridValues[subGridID] == this->currentStep +4) ) - u[ i ] = add; - } - time += currentTau; - - //cout << '\r' << flush; - //cout << maxResidue << " " << currentTau << " @ " << time << flush; - //lastResidue = maxResidue; - } - //cout << "Time: " << time << ", Res: " << maxResidue < 0.0) - this->stopTime /=(double) this->gridCols;*/ - -// VectorType solution; -// solution.setLike(u); -// for( int i = 0; i < u.getSize(); i ++ ) -// { -// solution[i]=u[i]; -// } -// return solution; - return u; -} - - -#ifdef HAVE_CUDA - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - //int j = threadIdx.x + threadIdx.y * blockDim.x; -// int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows -// + (blockIdx.y) * this->n*this->n*this->gridCols -// + (blockIdx.x) * this->n -// + threadIdx.y * this->n*this->gridCols -// + threadIdx.x; - - - int index = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - - //printf("i= %d,j= %d,th= %d\n",i,j,th); - *a = caller->work_u_cuda[index]; - //printf("Hi %f \n", *a); - //return ret; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ -// int j = threadIdx.x + threadIdx.y * blockDim.x; -// int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows -// + (blockIdx.y) * this->n*this->n*this->gridCols -// + (blockIdx.x) * this->n -// + threadIdx.y * this->n*this->gridCols -// + threadIdx.x; - - int index = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - - if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) - { - caller->work_u_cuda[index] = *a; - caller->unusedCell_cuda[index] = 0; - - } - - *a = caller->work_u_cuda[index]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA3D( double u, const int i ) -{ - - -// int j = threadIdx.x + threadIdx.y * blockDim.x; - //printf("j = %d, u = %f\n", j,u); - -// int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows -// + (blockIdx.y) * this->n*this->n*this->gridCols -// + (blockIdx.x) * this->n -// + threadIdx.y * this->n*this->gridCols -// + threadIdx.x; - - int index = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - - //printf("i= %d,j= %d,index= %d\n",i,j,index); - if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) - { - this->work_u_cuda[index] = u; - this->unusedCell_cuda[index] = 0; - - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID) -{ - - __shared__ int tmp; - __shared__ double value; - //double tmpRes = 0.0; - volatile double* sharedTau = &u[blockDim.x*blockDim.y*blockDim.z]; -// volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z]; - int i = threadIdx.x; - int j = threadIdx.y; - int k = threadIdx.z; - int l = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z*blockDim.x*blockDim.y; - bool computeFU = !((i == 0 && (boundaryCondition & 4)) or - (i == blockDim.x - 1 && (boundaryCondition & 2)) or - (j == 0 && (boundaryCondition & 8)) or - (j == blockDim.y - 1 && (boundaryCondition & 1))or - (k == 0 && (boundaryCondition & 32)) or - (k == blockDim.z - 1 && (boundaryCondition & 16))); - - if(l == 0) - { - tmp = 0; - int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1) )*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1) - + ((blockDim.z>>1)+blockDim.z*blockIdx.z)*blockDim.x*blockDim.y*gridDim.x*gridDim.y; - if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0) - tmp = 1; - } - __syncthreads(); - - - __syncthreads(); - if(tmp !=1) - { -// if(computeFU) -// absVal[l]=0.0; -// else -// absVal[l] = fabs(u[l]); -// -// __syncthreads(); -// -// if((blockDim.x == 16) && (l < 128)) absVal[l] = Max(absVal[l],absVal[l+128]); -// __syncthreads(); -// if((blockDim.x == 16) && (l < 64)) absVal[l] = Max(absVal[l],absVal[l+64]); -// __syncthreads(); -// if(l < 32) absVal[l] = Max(absVal[l],absVal[l+32]); -// if(l < 16) absVal[l] = Max(absVal[l],absVal[l+16]); -// if(l < 8) absVal[l] = Max(absVal[l],absVal[l+8]); -// if(l < 4) absVal[l] = Max(absVal[l],absVal[l+4]); -// if(l < 2) absVal[l] = Max(absVal[l],absVal[l+2]); -// if(l < 1) value = sign(u[0])*Max(absVal[l],absVal[l+1]); -// __syncthreads(); -// -// if(computeFU) -// u[l] = value; - if(computeFU) - { - tnlGridEntity Ent(subMesh); - if(boundaryCondition == 4) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(0,j,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.x) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.x+this->n); - } - else if(boundaryCondition == 2) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(blockDim.x - 1,j,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); - } - else if(boundaryCondition == 8) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,0,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(threadIdx.y) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.y+this->n); - } - else if(boundaryCondition == 1) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,blockDim.y - 1,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); - } - else if(boundaryCondition == 32) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,j,0)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(threadIdx.z); - } - else if(boundaryCondition == 16) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,j,blockDim.z - 1)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(this->n - 1 - threadIdx.z) ; - } - } - } - - double time = 0.0; - __shared__ double currentTau; - double cfl = this->cflCondition; - double fu = 0.0; -// if(threadIdx.x * threadIdx.y * threadIdx.z == 0) -// { -// currentTau = this->tau0; -// } - double finalTime = this->stopTime; - __syncthreads(); - if( boundaryCondition == 0 ) finalTime *= 2.0; - - tnlGridEntity Entity(subMesh); - tnlNeighborGridEntityGetter,3> neighborEntities(Entity); - Entity.setCoordinates(Containers::StaticVector<3,int>(i,j,k)); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - - - while( time < finalTime ) - { - sharedTau[l]=finalTime; - - if(computeFU) - { - fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighborEntities); - if(abs(fu) > 0.0) - sharedTau[l]=abs(cfl/fu); - } - - if(l == 0) - { - if(sharedTau[0] > 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()) sharedTau[0] = 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - } - else if(l == blockDim.x*blockDim.y*blockDim.z - 1) - { - if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - } - - __syncthreads(); - if(l < 256) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); - __syncthreads(); - if(l < 128) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); - __syncthreads(); - if(l < 64) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); - __syncthreads(); - if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); - __syncthreads(); - if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); - if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); - if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); - if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); - if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); - __syncthreads(); - -// if(abs(fu) < 10000.0) -// printf("bla"); - if(computeFU) - u[l] += currentTau * fu; - time += currentTau; - __syncthreads(); - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA3D(int i) const -{ - int j = i % (this->gridCols*this->gridRows*this->n*this->n); - - return ( (i / (this->gridCols*this->gridRows*this->n*this->n))*this->gridCols*this->gridRows - + (j / (this->gridCols*this->n*this->n))*this->gridCols - + (j % (this->gridCols*this->n))/this->n); -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA3D( int i ) const -{ - return this->subgridValues_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA3D(int i, int value) -{ - this->subgridValues_cuda[i] = value; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA3D( int i ) const -{ - return this->boundaryConditions_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA3D(int i, int value) -{ - this->boundaryConditions_cuda[i] = value; -} - - - -//north - 1, east - 2, west - 4, south - 8, up -16, down - 32 - -template -__global__ -void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - - __shared__ int boundary[6]; // north,east,west,south - __shared__ int subgridValue; - __shared__ int newSubgridValue; - - - int gid = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - double u = cudaSolver->work_u_cuda[gid]; - double u_cmp; - int subgridValue_cmp=INT_MAX; - int boundary_index=0; - - - if(threadIdx.x+threadIdx.y+threadIdx.z == 0) - { - subgridValue = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); - boundary[0] = 0; - boundary[1] = 0; - boundary[2] = 0; - boundary[3] = 0; - boundary[4] = 0; - boundary[5] = 0; - newSubgridValue = 0; -// printf("aaa z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); - } - __syncthreads(); - - - - if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.z == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.z == blockDim.z - 1 /* && (cudaSolver->currentStep & 1)*/) ) - { - if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y - 1); - boundary_index = 2; - } - - if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y + 1); - boundary_index = 1; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y - 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); - boundary_index = 3; - } - if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y + 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); - boundary_index = 0; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - - if(threadIdx.z == 0 && (blockIdx.z != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z - 1)*gridDim.x*gridDim.y); - boundary_index = 5; - } - if(threadIdx.z == blockDim.z - 1 && (blockIdx.z != gridDim.z - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z + 1)*gridDim.x*gridDim.y); - boundary_index = 4; - } - __threadfence(); - - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - } - __threadfence(); - - } - __syncthreads(); - - if(threadIdx.x+threadIdx.y+threadIdx.z == 0) - { - - if(subgridValue == INT_MAX && newSubgridValue != 0) - cudaSolver->setSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y, -INT_MAX); - - cudaSolver->setBoundaryConditionCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y, 1 * boundary[0] + - 2 * boundary[1] + - 4 * boundary[2] + - 8 * boundary[3] + - 16 * boundary[4] + - 32 * boundary[5] ); - if(blockIdx.x+blockIdx.y+blockIdx.z == 0) - { - cudaSolver->currentStep = cudaSolver->currentStep + 1; - *(cudaSolver->runcuda) = 0; - } - } -} - - - -template -__global__ -void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) -{ - int stepValue = cudaSolver->currentStep + 4; - if( cudaSolver->getSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x, stepValue); - - atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x)); -} - - - - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void initCUDA3D( tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3) -{ - - - cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); - cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); - cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels*sizeof(int)); - cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels*sizeof(int)); - cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); - *(cudaSolver->runcuda) = 1; - cudaSolver->currentStep = 1; - //cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost); - //ptr = cudaSolver->work_u_cuda; - printf("GPU memory allocated.\n"); - - for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels; i++) - { - cudaSolver->subgridValues_cuda[i] = INT_MAX; - cudaSolver->boundaryConditions_cuda[i] = 0; - } - - /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) - { - printf("%d\n",j); - cudaSolver->unusedCell_cuda[ j] = 1; - }*/ - printf("GPU memory initialized.\n"); -} - - - - -//extern __shared__ double array[]; -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller) - -{ - - - extern __shared__ double u[]; - - int i = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - - __shared__ int containsCurve; - if(l == 0) - { -// printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); - containsCurve = 0; - } - - caller->getSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - if(u[0] * u[l] <= 0.0) - { - atomicMax( &containsCurve, 1); - } - - __syncthreads(); - if(containsCurve == 1) - { - caller->runSubgridCUDA3D(0,u,i); - __syncthreads(); -// caller->insertSubgridCUDA3D(u[l],i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - - __syncthreads(); - if(l == 0) - caller->setSubgridValueCUDA3D(i, 4); - } - - -} - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller) -{ - extern __shared__ double u[]; - int i = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - int bound = caller->getBoundaryConditionCUDA3D(i); - - if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > 0) - { - caller->getSubgridCUDA3D(i,caller, &u[l]); - - //if(l == 0) - //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA3D(i)); - if(caller->getSubgridValueCUDA3D(i) == caller->currentStep+4) - { - if(bound & 1) - { - caller->runSubgridCUDA3D(1,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 2 ) - { - caller->runSubgridCUDA3D(2,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 4) - { - caller->runSubgridCUDA3D(4,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 8) - { - caller->runSubgridCUDA3D(8,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 16) - { - caller->runSubgridCUDA3D(16,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 32) - { - caller->runSubgridCUDA3D(32,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - - } - else - { - if( ((bound == 2))) - { - caller->runSubgridCUDA3D(2,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 1) )) - { - caller->runSubgridCUDA3D(1,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 8) )) - { - caller->runSubgridCUDA3D(8,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if((bound == 4)) - { - caller->runSubgridCUDA3D(4,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 16) - { - caller->runSubgridCUDA3D(16,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 32) - { - caller->runSubgridCUDA3D(32,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - } - /* 1 2 4 8 16 32 */ - - if( ((bound & 19 ))) /* 1 1 0 0 1 0 */ - { - caller->runSubgridCUDA3D(19,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 21 ))) /* 1 0 1 0 1 0 */ - { - caller->runSubgridCUDA3D(21,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 26 ))) /* 0 1 0 1 1 0 */ - { - caller->runSubgridCUDA3D(26,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 28 )) /* 0 0 1 1 1 0 */ - { - caller->runSubgridCUDA3D(28,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - - - - if( ((bound & 35 ))) /* 1 0 1 0 0 1 */ - { - caller->runSubgridCUDA3D(35,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 37 ))) /* 1 0 1 0 0 1 */ - { - caller->runSubgridCUDA3D(37,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 42 ))) /* 0 1 0 1 0 1 */ - { - caller->runSubgridCUDA3D(42,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 44 )) /* 0 0 1 1 0 1 */ - { - caller->runSubgridCUDA3D(44,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - - if(l==0) - { - caller->setBoundaryConditionCUDA3D(i, 0); - caller->setSubgridValueCUDA3D(i, caller->getSubgridValueCUDA3D(i) - 1 ); - } - - - } - - - -} - -#endif /*HAVE_CUDA*/ - -#endif /* TNLPARALLELEIKONALSOLVER3D_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/CMakeLists.txt b/src/TNL/Legacy/narrow-band/CMakeLists.txt deleted file mode 100644 index 158cd2013..000000000 --- a/src/TNL/Legacy/narrow-band/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -set( tnl_narrow_band_SOURCES -# MainBuildConfig.h -# tnlNarrowBand2D_impl.h -# tnlNarrowBand.h -# narrowBandConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(narrow-band main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(narrow-band main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (narrow-band tnl ) - - -INSTALL( TARGETS narrow-band - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_narrow_band_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/narrow-band ) diff --git a/src/TNL/Legacy/narrow-band/MainBuildConfig.h b/src/TNL/Legacy/narrow-band/MainBuildConfig.h deleted file mode 100644 index ed3d686eb..000000000 --- a/src/TNL/Legacy/narrow-band/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" < struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/narrow-band/main.cpp b/src/TNL/Legacy/narrow-band/main.cpp deleted file mode 100644 index 8849008ff..000000000 --- a/src/TNL/Legacy/narrow-band/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/narrow-band/main.cu b/src/TNL/Legacy/narrow-band/main.cu deleted file mode 100644 index 8849008ff..000000000 --- a/src/TNL/Legacy/narrow-band/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/narrow-band/main.h b/src/TNL/Legacy/narrow-band/main.h deleted file mode 100644 index 51dbdac37..000000000 --- a/src/TNL/Legacy/narrow-band/main.h +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - - -#include "MainBuildConfig.h" - //for HOST versions: -//#include "tnlNarrowBand.h" - //for DEVICE versions: -#include "tnlNarrowBand_CUDA.h" -#include "narrowBandConfig.h" -#include - -#include -#include -#include -#include - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - narrowBandConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - tnlNarrowBand, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <, double, int> solver; -// if(!solver.init(parameters)) -// { -// cerr << "Solver failed to initialize." < - -template< typename ConfigTag > -class narrowBandConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Narrow Band Solver solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - config.addRequiredEntry < double > ( "tau", "Time step."); - config.addRequiredEntry < double > ( "final-time", "Final time."); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); - } -}; - -#endif /* NARROWBANDCONFIG_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand.h deleted file mode 100644 index 7d3d19bc0..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand.h +++ /dev/null @@ -1,186 +0,0 @@ -/*************************************************************************** - tnlNarrowBand.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND_H_ -#define TNLNARROWBAND_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_OPENMP -#include -#endif - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlNarrowBand -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - tnlNarrowBand(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - - void setupSquare1000(Index i, Index j); - void setupSquare1100(Index i, Index j); - void setupSquare1010(Index i, Index j); - void setupSquare1001(Index i, Index j); - void setupSquare1110(Index i, Index j); - void setupSquare1101(Index i, Index j); - void setupSquare1011(Index i, Index j); - void setupSquare1111(Index i, Index j); - void setupSquare0000(Index i, Index j); - void setupSquare0100(Index i, Index j); - void setupSquare0010(Index i, Index j); - void setupSquare0001(Index i, Index j); - void setupSquare0110(Index i, Index j); - void setupSquare0101(Index i, Index j); - void setupSquare0011(Index i, Index j); - void setupSquare0111(Index i, Index j); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - tnlMeshFunction dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlNarrowBand(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j, const Index k); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - - tnlMeshFunction dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity; - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - //for single core version use this implementation: -#include "tnlNarrowBand2D_impl.h" - //for parallel version use this one instead: -// #include "tnlNarrowBand2D_openMP_impl.h" - -#include "tnlNarrowBand3D_impl.h" - -#endif /* TNLNARROWBAND_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h deleted file mode 100644 index dff0b48c8..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h +++ /dev/null @@ -1,1317 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND2D_IMPL_H_ -#define TNLNARROWBAND2D_IMPL_H_ - -#define NARROWBAND_SUBGRID_SIZE 32 - -#include "tnlNarrowBand.h" - -#ifdef HAVE_CUDA -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} -#endif - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const -{ - if(arg > 0.0) - return arg; - return 0.0; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const -{ - if(arg < 0.0) - return -arg; - return 0.0; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - tau = parameters.getParameter< double >( "tau" ); - - finalTime = parameters.getParameter< double >( "final-time" ); - - statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaStatusVector), statusGridSize*statusGridSize*sizeof(int)); -// cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice); - - cudaMalloc(&reinitialize, sizeof(int)); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#endif - - int n = Mesh.getDimensions().x(); - - dim3 threadsPerBlock2(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocks2(statusGridSize ,statusGridSize); - initSetupGridCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - /*dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1);*/ - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - cout << "Solver initialized." < -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlockFS(1, 512); - dim3 numBlocksFS(4,1); - dim3 threadsPerBlockNB(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocksNB(n/NARROWBAND_SUBGRID_SIZE + 1,n/NARROWBAND_SUBGRID_SIZE + 1); - - double time = 0.0; - int reinit = 0; - - cout << "Hi!" <>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cout << "Hi2!" < finalTime) - tau=finalTime-time; - - runNarrowBandCUDA<<>>(this->cudaSolver,tau); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - time += tau; - - - cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - if(reinit != 0 /*&& time != finalTime */) - { - cout << time <>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - runCUDA<<>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - } - } - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, - // 8 - to the east of curve, 16 - to the west of curve. - int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize; - if(cudaStatusVector[subgridID] != 0 && i, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9))*/ ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17))*/ ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 /*|| (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3))*/ ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5)) */) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - } - -} - - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - if(abs(cudaDofVector2[gid]) > 1.5*h) - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector2[gid]); - -// if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) -// { -// if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid+1] = sign(cudaDofVector2[gid+1])*0.5*h; -// } -// if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid+Mesh.getDimensions().x()] = sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; -// } -// -// if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid-1] = sign(cudaDofVector2[gid-1])*0.5*h; -// } -// if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid-Mesh.getDimensions().x()] = sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; -// } -// } - - -// - - - - - - -// if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) -// { -// if(cudaDofVector[Entity.getIndex()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - -} - - - - -__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - __shared__ double u0; - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - if(threadIdx.x+threadIdx.y == 0) - { -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - if(blockIdx.x+blockIdx.y == 0) - *(solver->reinitialize) = 0; - - solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] = 0; - - u0 = solver->cudaDofVector2[(blockDim.y*blockIdx.y + 0)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + 0]; - } - __syncthreads(); - - double u = solver->cudaDofVector2[(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x]; - - if(u*u0 <=0.0) - atomicMax(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y]),1); - } -// if(threadIdx.x+threadIdx.y == 0) - -// printf("Bye from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - -} - - - -// run this with one thread per block -__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ -// printf("Hello\n"); - if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1) - { -// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, -// 8 - to the east of curve, 16 - to the west of curve. - if(blockIdx.x > 0) - { - atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16); - } - - if(blockIdx.x < gridDim.x - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8); - - if(blockIdx.y > 0 ) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y - 1)]), 4); - - if(blockIdx.y < gridDim.y - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y + 1)]), 2); - } - - -} - - - - - -__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau) -{ - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x; - int i = threadIdx.x + blockIdx.x*blockDim.x; - int j = threadIdx.y + blockIdx.y*blockDim.y; - -// if(i+j == 0) -// printf("Hello\n"); - - int blockID = blockIdx.x + blockIdx.y*gridDim.x; /*i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);*/ - - int status = solver->cudaStatusVector[blockID]; - - if(solver->Mesh.getDimensions().x() > i && solver->Mesh.getDimensions().y() > j) - { - - if(status != 0) - { - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh); - Entity.setCoordinates(Containers::StaticVector<2,double>(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - double value = solver->cudaDofVector2[Entity.getIndex()]; - double xf,xb,yf,yb, grad, fu, a,b; - a = b = 0.0; - - if( i == 0 || (threadIdx.x == 0 && !(status & 9)) ) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) ) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()] - value; - } - else - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - - if( j == 0 || (threadIdx.y == 0 && !(status & 3)) ) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - else if( j == solver->Mesh.getDimensions().y() - 1 || (threadIdx.y == blockDim.y - 1 && !(status & 5)) ) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()] - value; - } - else - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - __syncthreads(); - - - - - - if(sign(value) >= 0.0) - { - xf = solver->negativePart(xf); - - xb = solver->positivePart(xb); - - yf = solver->negativePart(yf); - - yb = solver->positivePart(yb); - - } - else - { - - xb = solver->negativePart(xb); - - xf = solver->positivePart(xf); - - yb = solver->negativePart(yb); - - yf = solver->positivePart(yf); - } - - - if(xb > xf) - a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - else - a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - if(yb > yf) - b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - else - b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - - - -// grad = sqrt(0.5 * (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - grad = sqrt(/*0.5 **/ (a*a + b*b ) ); - - fu = -1.0 * grad; - - if((tau*fu+value)*value <=0 ) - { - // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, - // 8 - to the east of curve, 16 - to the west of curve. - - if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) ) - atomicMax(solver->reinitialize,1); - else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) - atomicMax(solver->reinitialize,1); - else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) ) - atomicMax(solver->reinitialize,1); - else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) - atomicMax(solver->reinitialize,1); - } - - solver->cudaDofVector2[Entity.getIndex()] += tau*fu; - } - } -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h deleted file mode 100644 index c92810490..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h +++ /dev/null @@ -1,1313 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND2D_IMPL_H_ -#define TNLNARROWBAND2D_IMPL_H_ - -#define NARROWBAND_SUBGRID_SIZE 32 - -#include "tnlNarrowBand.h" - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const -{ - if(arg > 0.0) - return arg; - return 0.0; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const -{ - if(arg < 0.0) - return -arg; - return 0.0; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - tau = parameters.getParameter< double >( "tau" ); - - finalTime = parameters.getParameter< double >( "final-time" ); - - statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaStatusVector), statusGridSize*statusGridSize*sizeof(int)); -// cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice); - - cudaMalloc(&reinitialize, sizeof(int)); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#endif - - int n = Mesh.getDimensions().x(); - - dim3 threadsPerBlock2(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocks2(statusGridSize ,statusGridSize); - initSetupGridCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - /*dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1);*/ - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - cout << "Solver initialized." < -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlockFS(1, 512); - dim3 numBlocksFS(4,1); - dim3 threadsPerBlockNB(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocksNB(n/NARROWBAND_SUBGRID_SIZE + 1,n/NARROWBAND_SUBGRID_SIZE + 1); - - double time = 0.0; - int reinit = 0; - - cout << "Hi!" <>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cout << "Hi2!" < finalTime) - tau=finalTime-time; - - runNarrowBandCUDA<<>>(this->cudaSolver,tau); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - time += tau; - - - cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - if(reinit != 0 /*&& time != finalTime */) - { - cout << time <>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - runCUDA<<>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - } - } - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, - // 8 - to the east of curve, 16 - to the west of curve. - int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); - if(/*cudaStatusVector[subgridID] != 0 &&*/ i, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9)) */) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17)) */) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0/* || (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3)) */) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5))*/ ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - } - -} - - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector2[gid]); - - if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) - { - if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid+1] = sign(cudaDofVector2[gid+1])*0.5*h; - } - if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid+Mesh.getDimensions().x()] = sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; - } - - if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid-1] = sign(cudaDofVector2[gid-1])*0.5*h; - } - if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid-Mesh.getDimensions().x()] = sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; - } - } - - -// - - - - - - -// if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) -// { -// if(cudaDofVector[Entity.getIndex()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - -} - - - - -__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - __shared__ double u0; - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - if(threadIdx.x+threadIdx.y == 0) - { -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - if(blockIdx.x+blockIdx.y == 0) - *(solver->reinitialize) = 0; - - solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] = 0; - - u0 = solver->cudaDofVector2[(blockDim.y*blockIdx.y + 0)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + 0]; - } - __syncthreads(); - - double u = solver->cudaDofVector2[(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x]; - - if(u*u0 <=0.0) - atomicMax(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y]),1); - } -// if(threadIdx.x+threadIdx.y == 0) - -// printf("Bye from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - -} - - - -// run this with one thread per block -__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ -// printf("Hello\n"); - if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1) - { -// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, -// 8 - to the east of curve, 16 - to the west of curve. - if(blockIdx.x > 0) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16); - - if(blockIdx.x < gridDim.x - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8); - - if(blockIdx.y > 0 ) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y - 1)]), 4); - - if(blockIdx.y < gridDim.y - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y + 1)]), 2); - } - - -} - - - - - -__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau) -{ - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x; - int i = threadIdx.x + blockIdx.x*blockDim.x; - int j = threadIdx.y + blockIdx.y*blockDim.y; - -// if(i+j == 0) -// printf("Hello\n"); - - int blockID = blockIdx.x + blockIdx.y*gridDim.x; /*i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);*/ - - int status = solver->cudaStatusVector[blockID]; - - if(solver->Mesh.getDimensions().x() > i && solver->Mesh.getDimensions().y() > j) - { - -// if(status != 0) - { - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh); - Entity.setCoordinates(Containers::StaticVector<2,double>(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - double value = solver->cudaDofVector2[Entity.getIndex()]; - double xf,xb,yf,yb, grad, fu, a,b; - a = b = 0.0; - - if( i == 0 /*|| (threadIdx.x == 0 && !(status & 9)) */) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - else if( i == solver->Mesh.getDimensions().x() - 1 /*|| (threadIdx.x == blockDim.x - 1 && !(status & 17)) */) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()] - value; - } - else - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - - if( j == 0/* || (threadIdx.y == 0 && !(status & 3))*/ ) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - else if( j == solver->Mesh.getDimensions().y() - 1 /*|| (threadIdx.y == blockDim.y - 1 && !(status & 5)) */) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()] - value; - } - else - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - __syncthreads(); - - - - - - if(sign(value) > 0.0) - { - xf = solver->negativePart(xf); - - xb = solver->positivePart(xb); - - yf = solver->negativePart(yf); - - yb = solver->positivePart(yb); - - } - else - { - - xb = solver->negativePart(xb); - - xf = solver->positivePart(xf); - - yb = solver->negativePart(yb); - - yf = solver->positivePart(yf); - } - - - if(xb > xf) - a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - else - a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - if(yb > yf) - b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - else - b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - - - -// grad = sqrt(0.5 * (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - grad = sqrt(/*0.5 **/ (a*a + b*b ) ); - - fu = -1.0 * grad; - -// if((tau*fu+value)*value <=0 ) -// { -// // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, -// // 8 - to the east of curve, 16 - to the west of curve. -// -// if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) ) -// atomicMax(solver->reinitialize,1); -// else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) -// atomicMax(solver->reinitialize,1); -// else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) ) -// atomicMax(solver->reinitialize,1); -// else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) -// atomicMax(solver->reinitialize,1); -// } - - solver->cudaDofVector2[Entity.getIndex()] += tau*fu; - } - } -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h deleted file mode 100644 index d42bc2a76..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h +++ /dev/null @@ -1,927 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND2D_IMPL_H_ -#define TNLNARROWBAND2D_IMPL_H_ - -#include "tnlNarrowBand.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - cout << "a" < -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) - { - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) - { - for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) - { - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - neighborEntities.refresh(Mesh,Entity.getIndex()); - - if(dofVector[this->Entity.getIndex()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } - } - cout << "a" < 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// i = 0; -// j = Mesh.getDimensions().y() -1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - //data.setLike(dofVector2.getData()); - //data=dofVector2.getData(); - //cout << data.getType() < -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - -// data.setLike(dofVector2.getData()); -// data = dofVector2.getData(); -// cout << data.getType() < -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter,2> neighborEntities(Entity); - - Real value = dofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); - -// if(dofVector2[Entity.getIndex()] > 1.0) -// cout << value << " " << tmp << " " << dofVector2[Entity.getIndex()] < -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h deleted file mode 100644 index d362f249a..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h +++ /dev/null @@ -1,961 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND3D_IMPL_H_ -#define TNLNARROWBAND3D_IMPL_H_ - -#include "tnlNarrowBand.h" - -//__device__ -//double fabsMin( double x, double y) -//{ -// double fx = abs(x); -// -// if(Min(fx,abs(y)) == fx) -// return x; -// else -// return y; -//} -// -//__device__ -//double atomicFabsMin(double* address, double val) -//{ -// unsigned long long int* address_as_ull = -// (unsigned long long int*)address; -// unsigned long long int old = *address_as_ull, assumed; -// do { -// assumed = old; -// old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); -// } while (assumed != old); -// return __longlong_as_double(old); -//} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(8, 8,8); - dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 512); - dim3 numBlocks(8,1); - - - runCUDA<<>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - tnlNeighborGridEntityGetter,3> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()] ); - } - - if( k == 0 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()]; - else - { - c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - int gid = Entity.getIndex(); - - if(abs(cudaDofVector[gid]) < 1.8*h) - cudaDofVector2[gid] = cudaDofVector[gid]; - else - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = 0; - int gy = threadIdx.y; - - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - - if(blockIdx.x==0) - { - for(int gz = 0; gz < n;gz++) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==2) - { - - for(int gz = 0; gz < n;gz++) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==3) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - - else if(blockIdx.x==4) - { - for(int gz = n-1; gz > -1;gz--) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==5) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==6) - { - - for(int gz = n-1; gz > -1;gz--) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==7) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - -} - - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gz = blockDim.z*blockIdx.z + threadIdx.z; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz) - { - solver->initGrid(gx,gy,gz); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -//} -#endif - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h deleted file mode 100644 index 6e63d527b..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h +++ /dev/null @@ -1,307 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND3D_IMPL_H_ -#define TNLNARROWBAND3D_IMPL_H_ - -#include "tnlNarrowBand.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; -// cout << "bla "< -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++) - { - - if (abs(dofVector[i]) < 1.8*h) - dofVector2[i]=dofVector[i]; - else - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - - - - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - dofVector2.save("u-00001.tnl"); - - cout << "bla 3"< -void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - this->Entity.setCoordinates(CoordinatesType(i,j,k)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter,3> neighborEntities(Entity); - Real value = dofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()] ); - } - - if( k == 0 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()]; - else - { - c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h deleted file mode 100644 index ca9b1da2c..000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h +++ /dev/null @@ -1,203 +0,0 @@ -/*************************************************************************** - tnlNarrowBand_CUDA.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND_H_ -#define TNLNARROWBAND_H_ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlNarrowBand -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlNarrowBand(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - bool run(); -#ifdef HAVE_CUDA - __device__ __host__ -#endif - RealType positivePart(const RealType arg) const; -#ifdef HAVE_CUDA - __device__ __host__ -#endif - RealType negativePart(const RealType arg) const; - -#ifdef HAVE_CUDA - __device__ bool initGrid(); - __device__ void updateValue(const Index i, const Index j); - __device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int* cudaStatusVector; - int counter; - int* reinitialize; - __device__ void setupSquare1000(Index i, Index j); - __device__ void setupSquare1100(Index i, Index j); - __device__ void setupSquare1010(Index i, Index j); - __device__ void setupSquare1001(Index i, Index j); - __device__ void setupSquare1110(Index i, Index j); - __device__ void setupSquare1101(Index i, Index j); - __device__ void setupSquare1011(Index i, Index j); - __device__ void setupSquare1111(Index i, Index j); - __device__ void setupSquare0000(Index i, Index j); - __device__ void setupSquare0100(Index i, Index j); - __device__ void setupSquare0010(Index i, Index j); - __device__ void setupSquare0001(Index i, Index j); - __device__ void setupSquare0110(Index i, Index j); - __device__ void setupSquare0101(Index i, Index j); - __device__ void setupSquare0011(Index i, Index j); - __device__ void setupSquare0111(Index i, Index j); -#endif - - MeshType Mesh; - -protected: - - int statusGridSize; - bool exactInput; - - tnlMeshFunction dofVector; - DofVectorType data; - - - RealType h, tau, finalTime; - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(int i, int j, int k); - __device__ void updateValue(const Index i, const Index j, const Index k); - __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction dofVector; - DofVectorType data; - - RealType h; - - -}; - - - - - - - -#ifdef HAVE_CUDA -//template -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); -//__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); - -__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void initSetupGrid1_2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau); -//__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver); -#endif - - - -#include "tnlNarrowBand2D_CUDA_v4_impl.h" -// #include "tnlNarrowBand3D_CUDA_impl.h" - -#endif /* TNLNARROWBAND_H_ */ -- GitLab