Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +4 −4 Original line number Diff line number Diff line Loading @@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Real *aux, int *BlockIterDevice); Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, int *BlockIterDevice, int oddEvenBlock); __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ); /*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +6 −6 Original line number Diff line number Diff line Loading @@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading @@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading Loading @@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; if ( fabs( tmp ) > 0.01*h ) if ( fabs( tmp ) > 0.001*h ) return true; else return false; Loading Loading @@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading @@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading @@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +114 −98 Original line number Diff line number Diff line Loading @@ -26,7 +26,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 100 ) : maxIterations( 1 ) { } Loading Loading @@ -250,7 +250,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); Loading @@ -261,7 +261,7 @@ solve( const MeshPointer& mesh, int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); int *dBlock; cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); int oddEvenBlock = 0; while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) Loading @@ -269,19 +269,30 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), dAux, BlockIterDevice ); auxPtr.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); TNL_CHECK_CUDA_DEVICE; oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); TNL_CHECK_CUDA_DEVICE; oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 ); //aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 ); cudaFree( dAux ); cudaFree( BlockIterDevice ); cudaFree( dBlock ); Loading @@ -299,7 +310,7 @@ solve( const MeshPointer& mesh, } #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > /*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) { int i = threadIdx.x + blockDim.x*blockIdx.x; Loading @@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; } } }*/ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) { Loading Loading @@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Real *aux, int *BlockIterDevice ) Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, int *BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; Loading Loading @@ -417,6 +428,9 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } __syncthreads(); if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) { if( thri == 0 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) Loading Loading @@ -521,5 +535,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; } } #endif Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +4 −4 Original line number Diff line number Diff line Loading @@ -129,12 +129,12 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Real *aux, int *BlockIterDevice); Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, int *BlockIterDevice, int oddEvenBlock); __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ); template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ); /*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a );*/ template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +6 −6 Original line number Diff line number Diff line Loading @@ -945,7 +945,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con { sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading @@ -957,7 +957,7 @@ updateCell( volatile Real sArray[18][18], int thri, int thrj, const Real hx, con ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading Loading @@ -989,7 +989,7 @@ updateCell( volatile Real sArray[18], int thri, const Real h, const Real v ) sArray[ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thri ]; if ( fabs( tmp ) > 0.01*h ) if ( fabs( tmp ) > 0.001*h ) return true; else return false; Loading Loading @@ -1032,7 +1032,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading @@ -1046,7 +1046,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, { sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading @@ -1059,7 +1059,7 @@ updateCell( volatile Real sArray[10][10][10], int thri, int thrj, int thrk, hx * hx * ( b - c ) * ( b - c ) ) )/( hx * hx * hy * hy + hy * hy * hz * hz + hz * hz * hx *hx ); sArray[ thrk ][ thrj ][ thri ] = argAbsMin( value, tmp ); tmp = value - sArray[ thrk ][ thrj ][ thri ]; if ( fabs( tmp ) > 0.01*hx ) if ( fabs( tmp ) > 0.001*hx ) return true; else return false; Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +114 −98 Original line number Diff line number Diff line Loading @@ -26,7 +26,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 100 ) : maxIterations( 1 ) { } Loading Loading @@ -250,7 +250,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //aux1<<< gridSize, blockSize >>>( auxPtr.template modifyData< Device>(), dAux,1 ); //int BlockIter = 1;// = (bool*)malloc( ( numBlocksX * numBlocksY ) * sizeof( bool ) ); Loading @@ -261,7 +261,7 @@ solve( const MeshPointer& mesh, int nBlocks = ( numBlocksX * numBlocksY )/512 + ((( numBlocksX * numBlocksY )%512 != 0) ? 1:0); int *dBlock; cudaMalloc(&dBlock, nBlocks * sizeof( int ) ); int oddEvenBlock = 0; while( BlockIterD ) { /*for( int i = 0; i < numBlocksX * numBlocksY; i++ ) Loading @@ -269,19 +269,30 @@ solve( const MeshPointer& mesh, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), dAux, BlockIterDevice ); auxPtr.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); TNL_CHECK_CUDA_DEVICE; oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>(), BlockIterDevice, oddEvenBlock ); TNL_CHECK_CUDA_DEVICE; oddEvenBlock= (oddEvenBlock == 0) ? 1: 0; CudaParallelReduc<<< nBlocks , 512 >>>( BlockIterDevice, dBlock, ( numBlocksX * numBlocksY ) ); TNL_CHECK_CUDA_DEVICE; CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks ); TNL_CHECK_CUDA_DEVICE; cudaMemcpy(&BlockIterD, &dBlock[0], sizeof( int ), cudaMemcpyDeviceToHost); /*for( int i = 1; i < numBlocksX * numBlocksY; i++ ) BlockIter[ 0 ] = BlockIter[ 0 ] || BlockIter[ i ];*/ } aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 ); //aux1<<<gridSize,blockSize>>>( auxPtr.template modifyData< Device>(), dAux, 0 ); cudaFree( dAux ); cudaFree( BlockIterDevice ); cudaFree( dBlock ); Loading @@ -299,7 +310,7 @@ solve( const MeshPointer& mesh, } #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > /*template < typename Real, typename Device, typename Index > __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, Real *dAux, int a ) { int i = threadIdx.x + blockDim.x*blockIdx.x; Loading @@ -314,7 +325,7 @@ __global__ void aux1( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, In aux[ j*mesh.getDimensions().x() + i ] = dAux[ j*mesh.getDimensions().x() + i ]; } } }*/ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlocks ) { Loading Loading @@ -366,8 +377,8 @@ __global__ void CudaParallelReduc( int *BlockIterDevice, int *dBlock, int nBlock template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr, const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Real *aux, int *BlockIterDevice ) Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux, int *BlockIterDevice, int oddEvenBlock ) { int thri = threadIdx.x; int thrj = threadIdx.y; int blIdx = blockIdx.x; int blIdy = blockIdx.y; Loading Loading @@ -417,6 +428,9 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< } __syncthreads(); if( (blIdy%2 + blIdx) % 2 == oddEvenBlock ) { if( thri == 0 ) { if( dimX > (blIdx+1) * blockDim.x && thrj+1 < ykolik ) Loading Loading @@ -521,5 +535,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && (!interfaceMap[ j * mesh.getDimensions().x() + i ]) ) aux[ j * mesh.getDimensions().x() + i ] = sArray[ thrj + 1 ][ thri + 1 ]; } } #endif