Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +11 −1 Original line number Diff line number Diff line Loading @@ -68,6 +68,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, const RealType velocity = 1.0 ); __cuda_callable__ void updateCell( Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j ); /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j );*/ }; template< typename Real, Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +229 −2 Original line number Diff line number Diff line Loading @@ -218,7 +218,6 @@ updateCell( MeshFunctionType& u, { const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); const MeshType& mesh = cell.getMesh(); const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y(); const RealType value = u( cell ); Loading Loading @@ -796,3 +795,231 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 } } } template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) { int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; int xOd = 0; int yOd = 0; if( blockIdx == 0 ) xOd = 1; if( blockIdy == 0 ) yOd = 1; if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+1; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+1; if( dimX > (blockIdx+1) * 16 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ]; } } if( blockIdx != 0 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0]; } } if( dimY > (blockIdy+1) * 16 ) { for( int i = xOd; i < xkolik; i++ ) { sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ]; } } if( blockIdy != 0 ) { for( int i = xOd; i < xkolik; i++ ) { sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ]; } } /*int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+1; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+1; if( numOfBlockx == 0 || numOfBlocky == 0 ) return; else { if( blockIdx == 0 ) { if( blockIdy == 0 ) { for( int j = 0; j < ykolik-1; j++ ) for( int i = 0; i < xkolik-1; i++ ) { sArray[ j+1 ][ i+1 ] = aux[ j*dimX + i ]; } //vypis( *sArray ); } else { for( int j = 0; j < ykolik; j++ ) for( int i = 0; i < xkolik-1; i++ ) { sArray[ j ][ i+1 ] = aux[ blockIdy*16*dimX - dimX + j*dimX + i ]; } } } else if( blockIdy == 0 ) { for( int j = 0; j < ykolik-1; j++ ) for( int i = 0; i < xkolik; i++ ) { sArray[ j+1 ][ i ] = aux[ blockIdx*16 - 1 + j*dimX + i ]; } } else { for( int j = 0; j < ykolik; j++ ) for( int i = 0; i < xkolik; i++ ) { //if( dimX*dimY-1 > blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ) sArray[ j ][ i ] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ]; } } }*/ } /*template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) { int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+2; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+2; if( numOfBlockx == 0 || numOfBlocky == 0 ) return; else { if( blockIdx == 0 ) { if( blockIdy == 0 ) { for( int j = 0; j < ykolik-2; j++ ) for( int i = 0; i < xkolik-2; i++ ) { aux[ j*dimX + i ] = sArray[ j+1 ][ i+1 ]; } } else { for( int j = 1; j < ykolik-1; j++ ) for( int i = 0; i < xkolik-2; i++ ) { aux[ blockIdy*16*dimX - dimX + j*dimX + i ] = sArray[ j ][ i+1 ]; } } } else if( blockIdy == 0 ) { for( int j = 0; j < ykolik-2; j++ ) for( int i = 1; i < xkolik-1; i++ ) { aux[ blockIdx*16 - 1 + j*dimX + i ] = sArray[ j+1 ][ i ]; } } else { for( int j = 1; j < ykolik-1; j++ ) for( int i = 1; i < xkolik-1; i++ ) { aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ] = sArray[ j ][ i ]; } } } }*/ template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real v ) { //const Meshes::Grid< 2, Real, Device, Index >& mesh = u.template getMesh< Devices::Cuda >(); /*typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y();*/ const RealType value = sArray[ thrj ][ thri ];//u( cell ); RealType a, b, tmp = TypeInfo< RealType >::getMaxValue(); /*if( value != u[ cell.getIndex() ] ) return;*/ b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], sArray[ thrj-1 ][ thri ] ); a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], sArray[ thrj ][ thri-1 ] ); if( fabs( a ) == TypeInfo< RealType >::getMaxValue() && fabs( b ) == TypeInfo< RealType >::getMaxValue() ) return; RealType pom[6] = { a, b, TypeInfo< Real >::getMaxValue(), (RealType)hx, (RealType)hy, 0.0 }; sortMinims( pom ); tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; if( fabs( tmp ) < fabs( pom[ 1 ] ) ) { //u[ cell.getIndex() ]= argAbsMin( value, tmp ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); } else { tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); //u[ cell.getIndex() ]= argAbsMin( value, tmp ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); } } No newline at end of file src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +0 −1 Original line number Diff line number Diff line Loading @@ -99,7 +99,6 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ); protected: const IndexType maxIterations; Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +34 −13 Original line number Diff line number Diff line Loading @@ -221,11 +221,12 @@ solve( const MeshPointer& mesh, dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); int DIM = mesh->getDimensions().x(); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; for( int k = 0; k < numBlocksX; k++) CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, int nBlockIter = numBlocksX > numBlocksY ? numBlocksX : numBlocksY; nBlockIter = numBlocksX == numBlocksY ? nBlockIter + 1 : nBlockIter; for( int k = 0; k < nBlockIter; k++) CudaUpdateCellCaller<<< gridSize, blockSize, 18 * 18 * sizeof( Real ) >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>() ); cudaDeviceSynchronize(); Loading @@ -245,24 +246,44 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; int thri = threadIdx.x; int thrj = threadIdx.y; // nelze ke stejnym pristupovat znovu pres threadIdx (vzdy da jine hodnoty) int blIdx = blockIdx.x; int blIdy = blockIdx.y; int i = thri + blockDim.x*blIdx; int j = blockDim.y*blIdy + thrj; const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); const Real hx = mesh.getSpaceSteps().x(); const Real hy = mesh.getSpaceSteps().y(); __shared__ Real sArray[ 18 ][ 18 ]; for( int k = 0; k < 18; k++ ) for( int l = 0; l < 18; l++ ) sArray[ k ][ l ] = TypeInfo< Real >::getMaxValue(); __syncthreads(); /*//filling shared array ptr.setsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); __syncthreads();*/ if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); //tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; for( int k = 0; k < 16; k++ ) sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; ptr.setsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); //fill edges of sArray __syncthreads(); if( ! interfaceMap[ j*mesh.getDimensions().x() + i ] ) { if( ! interfaceMap( cell ) ) for( int k = 0; k < 17; k++ ) { ptr.updateCell( aux, cell ); ptr.updateCell( sArray, thri+1, thrj+1, hx, hy ); __syncthreads(); } } /*for( int k = 0; k < mesh.getDimensions().x(); k++) for( int l = 0; l < mesh.getDimensions().y(); l++) aux[ k*mesh.getDimensions().x() + l ] = TypeInfo< Real >::getMaxValue();*/ aux[j*mesh.getDimensions().x() + i] = sArray[thrj+1][thri+1]; __syncthreads(); } //ptr.getsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); } //#endif No newline at end of file src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +3 −3 Original line number Diff line number Diff line Loading @@ -21,7 +21,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 2 ) : maxIterations( 1 ) { } Loading Loading @@ -257,7 +257,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; for( int k = 0; k < numBlocksX; k++) CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>() ); cudaDeviceSynchronize(); Loading Loading @@ -291,7 +291,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; cell.refresh(); //tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; for( int l = 0; l < 8; l++ ) for( int l = 0; l < 10; l++ ) { if( ! interfaceMap( cell ) ) { Loading Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +11 −1 Original line number Diff line number Diff line Loading @@ -68,6 +68,16 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, const RealType velocity = 1.0 ); __cuda_callable__ void updateCell( Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real velocity = 1.0 ); __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j ); /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int i, int j );*/ }; template< typename Real, Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +229 −2 Original line number Diff line number Diff line Loading @@ -218,7 +218,6 @@ updateCell( MeshFunctionType& u, { const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); const MeshType& mesh = cell.getMesh(); const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y(); const RealType value = u( cell ); Loading Loading @@ -796,3 +795,231 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 } } } template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) { int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; int xOd = 0; int yOd = 0; if( blockIdx == 0 ) xOd = 1; if( blockIdy == 0 ) yOd = 1; if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+1; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+1; if( dimX > (blockIdx+1) * 16 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ]; } } if( blockIdx != 0 ) { for( int i = yOd; i < ykolik; i++ ) { sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0]; } } if( dimY > (blockIdy+1) * 16 ) { for( int i = xOd; i < xkolik; i++ ) { sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ]; } } if( blockIdy != 0 ) { for( int i = xOd; i < xkolik; i++ ) { sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ]; } } /*int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+1; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+1; if( numOfBlockx == 0 || numOfBlocky == 0 ) return; else { if( blockIdx == 0 ) { if( blockIdy == 0 ) { for( int j = 0; j < ykolik-1; j++ ) for( int i = 0; i < xkolik-1; i++ ) { sArray[ j+1 ][ i+1 ] = aux[ j*dimX + i ]; } //vypis( *sArray ); } else { for( int j = 0; j < ykolik; j++ ) for( int i = 0; i < xkolik-1; i++ ) { sArray[ j ][ i+1 ] = aux[ blockIdy*16*dimX - dimX + j*dimX + i ]; } } } else if( blockIdy == 0 ) { for( int j = 0; j < ykolik-1; j++ ) for( int i = 0; i < xkolik; i++ ) { sArray[ j+1 ][ i ] = aux[ blockIdx*16 - 1 + j*dimX + i ]; } } else { for( int j = 0; j < ykolik; j++ ) for( int i = 0; i < xkolik; i++ ) { //if( dimX*dimY-1 > blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ) sArray[ j ][ i ] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ]; } } }*/ } /*template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) { int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); int xkolik = 18; int ykolik = 18; if( numOfBlockx - 1 == blockIdx ) xkolik = dimX - (numOfBlockx-1)*16+2; if( numOfBlocky -1 == blockIdy ) ykolik = dimY - (numOfBlocky-1)*16+2; if( numOfBlockx == 0 || numOfBlocky == 0 ) return; else { if( blockIdx == 0 ) { if( blockIdy == 0 ) { for( int j = 0; j < ykolik-2; j++ ) for( int i = 0; i < xkolik-2; i++ ) { aux[ j*dimX + i ] = sArray[ j+1 ][ i+1 ]; } } else { for( int j = 1; j < ykolik-1; j++ ) for( int i = 0; i < xkolik-2; i++ ) { aux[ blockIdy*16*dimX - dimX + j*dimX + i ] = sArray[ j ][ i+1 ]; } } } else if( blockIdy == 0 ) { for( int j = 0; j < ykolik-2; j++ ) for( int i = 1; i < xkolik-1; i++ ) { aux[ blockIdx*16 - 1 + j*dimX + i ] = sArray[ j+1 ][ i ]; } } else { for( int j = 1; j < ykolik-1; j++ ) for( int i = 1; i < xkolik-1; i++ ) { aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ] = sArray[ j ][ i ]; } } } }*/ template< typename Real, typename Device, typename Index > __cuda_callable__ void tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: updateCell( Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, const Real v ) { //const Meshes::Grid< 2, Real, Device, Index >& mesh = u.template getMesh< Devices::Cuda >(); /*typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y();*/ const RealType value = sArray[ thrj ][ thri ];//u( cell ); RealType a, b, tmp = TypeInfo< RealType >::getMaxValue(); /*if( value != u[ cell.getIndex() ] ) return;*/ b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], sArray[ thrj-1 ][ thri ] ); a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], sArray[ thrj ][ thri-1 ] ); if( fabs( a ) == TypeInfo< RealType >::getMaxValue() && fabs( b ) == TypeInfo< RealType >::getMaxValue() ) return; RealType pom[6] = { a, b, TypeInfo< Real >::getMaxValue(), (RealType)hx, (RealType)hy, 0.0 }; sortMinims( pom ); tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; if( fabs( tmp ) < fabs( pom[ 1 ] ) ) { //u[ cell.getIndex() ]= argAbsMin( value, tmp ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); } else { tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); //u[ cell.getIndex() ]= argAbsMin( value, tmp ); sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); } } No newline at end of file
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +0 −1 Original line number Diff line number Diff line Loading @@ -99,7 +99,6 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ); protected: const IndexType maxIterations; Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +34 −13 Original line number Diff line number Diff line Loading @@ -221,11 +221,12 @@ solve( const MeshPointer& mesh, dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); int DIM = mesh->getDimensions().x(); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; for( int k = 0; k < numBlocksX; k++) CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, int nBlockIter = numBlocksX > numBlocksY ? numBlocksX : numBlocksY; nBlockIter = numBlocksX == numBlocksY ? nBlockIter + 1 : nBlockIter; for( int k = 0; k < nBlockIter; k++) CudaUpdateCellCaller<<< gridSize, blockSize, 18 * 18 * sizeof( Real ) >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>() ); cudaDeviceSynchronize(); Loading @@ -245,24 +246,44 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; int thri = threadIdx.x; int thrj = threadIdx.y; // nelze ke stejnym pristupovat znovu pres threadIdx (vzdy da jine hodnoty) int blIdx = blockIdx.x; int blIdy = blockIdx.y; int i = thri + blockDim.x*blIdx; int j = blockDim.y*blIdy + thrj; const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); const Real hx = mesh.getSpaceSteps().x(); const Real hy = mesh.getSpaceSteps().y(); __shared__ Real sArray[ 18 ][ 18 ]; for( int k = 0; k < 18; k++ ) for( int l = 0; l < 18; l++ ) sArray[ k ][ l ] = TypeInfo< Real >::getMaxValue(); __syncthreads(); /*//filling shared array ptr.setsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); __syncthreads();*/ if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); //tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; for( int k = 0; k < 16; k++ ) sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; ptr.setsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); //fill edges of sArray __syncthreads(); if( ! interfaceMap[ j*mesh.getDimensions().x() + i ] ) { if( ! interfaceMap( cell ) ) for( int k = 0; k < 17; k++ ) { ptr.updateCell( aux, cell ); ptr.updateCell( sArray, thri+1, thrj+1, hx, hy ); __syncthreads(); } } /*for( int k = 0; k < mesh.getDimensions().x(); k++) for( int l = 0; l < mesh.getDimensions().y(); l++) aux[ k*mesh.getDimensions().x() + l ] = TypeInfo< Real >::getMaxValue();*/ aux[j*mesh.getDimensions().x() + i] = sArray[thrj+1][thri+1]; __syncthreads(); } //ptr.getsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); } //#endif No newline at end of file
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +3 −3 Original line number Diff line number Diff line Loading @@ -21,7 +21,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() : maxIterations( 2 ) : maxIterations( 1 ) { } Loading Loading @@ -257,7 +257,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; for( int k = 0; k < numBlocksX; k++) CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>() ); cudaDeviceSynchronize(); Loading Loading @@ -291,7 +291,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; cell.refresh(); //tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; for( int l = 0; l < 8; l++ ) for( int l = 0; l < 10; l++ ) { if( ! interfaceMap( cell ) ) { Loading