diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index 138671a3b7c9fa6bae88e2bc6f2651726b3fae5c..69fce1467ff594daef81e35d1c49118ec8252637 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -67,7 +67,17 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > template< typename MeshEntity > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, - const RealType velocity = 1.0 ); + const RealType velocity = 1.0 ); + + __cuda_callable__ void updateCell( Real sArray[18][18], + int thri, int thrj, const Real hx, const Real hy, + const Real velocity = 1.0 ); + + __cuda_callable__ void setsArray( MeshFunctionType& aux, Real sArray[18][18], + int dimX, int dimY, int i, int j ); + + /*__cuda_callable__ void getsArray( MeshFunctionType& aux, Real sArray[18][18], + int dimX, int dimY, int i, int j );*/ }; template< typename Real, diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index bf78a02a483083f84e6ded44bc823674b61bb96f..a89d230f6497fc0145712d002c38be1a110157f0 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -218,7 +218,6 @@ updateCell( MeshFunctionType& u, { const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); const MeshType& mesh = cell.getMesh(); - const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y(); const RealType value = u( cell ); @@ -598,7 +597,7 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double } template < typename T1 > -__cuda_callable__ void sortMinims( T1 pom[]) +__cuda_callable__ void sortMinims( T1 pom[] ) { T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ @@ -796,3 +795,231 @@ __global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3 } } } + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +setsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) +{ + int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); + int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); + int xkolik = 18; + int ykolik = 18; + int xOd = 0; + int yOd = 0; + + if( blockIdx == 0 ) + xOd = 1; + if( blockIdy == 0 ) + yOd = 1; + + if( numOfBlockx - 1 == blockIdx ) + xkolik = dimX - (numOfBlockx-1)*16+1; + + if( numOfBlocky -1 == blockIdy ) + ykolik = dimY - (numOfBlocky-1)*16+1; + + if( dimX > (blockIdx+1) * 16 ) + { + for( int i = yOd; i < ykolik; i++ ) + { + sArray[i][17] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 17 ]; + } + } + if( blockIdx != 0 ) + { + for( int i = yOd; i < ykolik; i++ ) + { + sArray[i][0] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + i*dimX + 0]; + } + } + if( dimY > (blockIdy+1) * 16 ) + { + for( int i = xOd; i < xkolik; i++ ) + { + sArray[17][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 17*dimX + i ]; + } + } + if( blockIdy != 0 ) + { + for( int i = xOd; i < xkolik; i++ ) + { + sArray[0][i] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + 0*dimX + i ]; + } + } + + /*int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); + int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); + int xkolik = 18; + int ykolik = 18; + + if( numOfBlockx - 1 == blockIdx ) + xkolik = dimX - (numOfBlockx-1)*16+1; + + if( numOfBlocky -1 == blockIdy ) + ykolik = dimY - (numOfBlocky-1)*16+1; + + + if( numOfBlockx == 0 || numOfBlocky == 0 ) + return; + else + { + if( blockIdx == 0 ) + { + if( blockIdy == 0 ) + { + for( int j = 0; j < ykolik-1; j++ ) + for( int i = 0; i < xkolik-1; i++ ) + { + sArray[ j+1 ][ i+1 ] = aux[ j*dimX + i ]; + } + + //vypis( *sArray ); + } + else + { + for( int j = 0; j < ykolik; j++ ) + for( int i = 0; i < xkolik-1; i++ ) + { + sArray[ j ][ i+1 ] = aux[ blockIdy*16*dimX - dimX + j*dimX + i ]; + } + } + } + else if( blockIdy == 0 ) + { + for( int j = 0; j < ykolik-1; j++ ) + for( int i = 0; i < xkolik; i++ ) + { + sArray[ j+1 ][ i ] = aux[ blockIdx*16 - 1 + j*dimX + i ]; + } + } + else + { + for( int j = 0; j < ykolik; j++ ) + for( int i = 0; i < xkolik; i++ ) + { + //if( dimX*dimY-1 > blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ) + sArray[ j ][ i ] = aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ]; + } + } + }*/ +} + +/*template< typename Real, + typename Device, + typename Index > +__cuda_callable__ void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +getsArray( MeshFunctionType& aux, Real sArray[18][18], int dimX, int dimY, int blockIdx, int blockIdy ) +{ + int numOfBlockx = dimX/16 + ((dimX%16 != 0) ? 1:0); + int numOfBlocky = dimY/16 + ((dimY%16 != 0) ? 1:0); + int xkolik = 18; + int ykolik = 18; + + if( numOfBlockx - 1 == blockIdx ) + xkolik = dimX - (numOfBlockx-1)*16+2; + + if( numOfBlocky -1 == blockIdy ) + ykolik = dimY - (numOfBlocky-1)*16+2; + + if( numOfBlockx == 0 || numOfBlocky == 0 ) + return; + else + { + if( blockIdx == 0 ) + { + if( blockIdy == 0 ) + { + for( int j = 0; j < ykolik-2; j++ ) + for( int i = 0; i < xkolik-2; i++ ) + { + aux[ j*dimX + i ] = sArray[ j+1 ][ i+1 ]; + } + } + else + { + for( int j = 1; j < ykolik-1; j++ ) + for( int i = 0; i < xkolik-2; i++ ) + { + aux[ blockIdy*16*dimX - dimX + j*dimX + i ] = sArray[ j ][ i+1 ]; + } + } + } + else if( blockIdy == 0 ) + { + for( int j = 0; j < ykolik-2; j++ ) + for( int i = 1; i < xkolik-1; i++ ) + { + aux[ blockIdx*16 - 1 + j*dimX + i ] = sArray[ j+1 ][ i ]; + } + } + else + { + for( int j = 1; j < ykolik-1; j++ ) + for( int i = 1; i < xkolik-1; i++ ) + { + aux[ blockIdy*16*dimX - dimX + blockIdx*16 - 1 + j*dimX + i ] = sArray[ j ][ i ]; + } + } + } +}*/ + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +void +tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >:: +updateCell( Real sArray[18][18], int thri, int thrj, const Real hx, const Real hy, + const Real v ) +{ + + //const Meshes::Grid< 2, Real, Device, Index >& mesh = u.template getMesh< Devices::Cuda >(); + /*typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; + Cell cell( mesh ); + cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; + cell.refresh(); + + const auto& neighborEntities = cell.template getNeighborEntities< 2 >(); + const RealType& hx = mesh.getSpaceSteps().x(); + const RealType& hy = mesh.getSpaceSteps().y();*/ + const RealType value = sArray[ thrj ][ thri ];//u( cell ); + RealType a, b, tmp = TypeInfo< RealType >::getMaxValue(); + + /*if( value != u[ cell.getIndex() ] ) + return;*/ + + b = TNL::argAbsMin( sArray[ thrj+1 ][ thri ], + sArray[ thrj-1 ][ thri ] ); + + a = TNL::argAbsMin( sArray[ thrj ][ thri+1 ], + sArray[ thrj ][ thri-1 ] ); + + + + if( fabs( a ) == TypeInfo< RealType >::getMaxValue() && + fabs( b ) == TypeInfo< RealType >::getMaxValue() ) + return; + + RealType pom[6] = { a, b, TypeInfo< Real >::getMaxValue(), (RealType)hx, (RealType)hy, 0.0 }; + sortMinims( pom ); + tmp = pom[ 0 ] + TNL::sign( value ) * pom[ 3 ]/v; + + + if( fabs( tmp ) < fabs( pom[ 1 ] ) ) + { + //u[ cell.getIndex() ]= argAbsMin( value, tmp ); + sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); + } + else + { + tmp = ( pom[ 3 ] * pom[ 3 ] * pom[ 1 ] + pom[ 4 ] * pom[ 4 ] * pom[ 0 ] + + TNL::sign( value ) * pom[ 3 ] * pom[ 4 ] * TNL::sqrt( ( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] )/( v * v ) - + ( pom[ 1 ] - pom[ 0 ] ) * ( pom[ 1 ] - pom[ 0 ] ) ) )/( pom[ 3 ] * pom[ 3 ] + pom[ 4 ] * pom[ 4 ] ); + //u[ cell.getIndex() ]= argAbsMin( value, tmp ); + sArray[ thrj ][ thri ] = argAbsMin( value, tmp ); + } +} \ No newline at end of file diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h index e44aaca871ee1f9118351c5032b5cb68f3536caf..54e577dacde239171ae299ee62b078bd7a406c35 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h @@ -99,7 +99,6 @@ class FastSweepingMethod< Meshes::Grid< 2, Real, Device, Index >, Anisotropy > const AnisotropyPointer& anisotropy, MeshFunctionPointer& u ); - protected: const IndexType maxIterations; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 73e170895872d8b1a572031da4ea3baa423de24c..80976772cde6e1d9ffcad92bf8779cea17906e29 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -221,11 +221,12 @@ solve( const MeshPointer& mesh, dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); - int DIM = mesh->getDimensions().x(); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - for( int k = 0; k < numBlocksX; k++) - CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, + int nBlockIter = numBlocksX > numBlocksY ? numBlocksX : numBlocksY; + nBlockIter = numBlocksX == numBlocksY ? nBlockIter + 1 : nBlockIter; + for( int k = 0; k < nBlockIter; k++) + CudaUpdateCellCaller<<< gridSize, blockSize, 18 * 18 * sizeof( Real ) >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>() ); cudaDeviceSynchronize(); @@ -245,24 +246,44 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) { - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; + int thri = threadIdx.x; int thrj = threadIdx.y; // nelze ke stejnym pristupovat znovu pres threadIdx (vzdy da jine hodnoty) + int blIdx = blockIdx.x; int blIdy = blockIdx.y; + int i = thri + blockDim.x*blIdx; + int j = blockDim.y*blIdy + thrj; const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >(); + const Real hx = mesh.getSpaceSteps().x(); + const Real hy = mesh.getSpaceSteps().y(); + + __shared__ Real sArray[ 18 ][ 18 ]; + for( int k = 0; k < 18; k++ ) + for( int l = 0; l < 18; l++ ) + sArray[ k ][ l ] = TypeInfo< Real >::getMaxValue(); + __syncthreads(); + /*//filling shared array + ptr.setsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); + __syncthreads();*/ if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { - typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; - Cell cell( mesh ); - cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; - cell.refresh(); - //tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr; - for( int k = 0; k < 16; k++ ) + sArray[thrj+1][thri+1] = aux[ j*mesh.getDimensions().x() + i ]; + ptr.setsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); //fill edges of sArray + __syncthreads(); + + + if( ! interfaceMap[ j*mesh.getDimensions().x() + i ] ) { - if( ! interfaceMap( cell ) ) + for( int k = 0; k < 17; k++ ) { - ptr.updateCell( aux, cell ); + ptr.updateCell( sArray, thri+1, thrj+1, hx, hy ); + __syncthreads(); } } + /*for( int k = 0; k < mesh.getDimensions().x(); k++) + for( int l = 0; l < mesh.getDimensions().y(); l++) + aux[ k*mesh.getDimensions().x() + l ] = TypeInfo< Real >::getMaxValue();*/ + aux[j*mesh.getDimensions().x() + i] = sArray[thrj+1][thri+1]; + __syncthreads(); } + //ptr.getsArray( aux, sArray, mesh.getDimensions().x(), mesh.getDimensions().y(), blIdx, blIdy ); } //#endif \ No newline at end of file diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 3e8cb7bb19c054a35a282e09b70d3b38ef6cda6e..5ddd3bdeb039e38ef904a4b574c90bbd4dd89e68 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -21,7 +21,7 @@ template< typename Real, typename Anisotropy > FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod() -: maxIterations( 2 ) +: maxIterations( 1 ) { } @@ -257,7 +257,7 @@ solve( const MeshPointer& mesh, tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; for( int k = 0; k < numBlocksX; k++) - CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr, + CudaUpdateCellCaller<<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template modifyData< Device>() ); cudaDeviceSynchronize(); @@ -291,7 +291,7 @@ __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k; cell.refresh(); //tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr; - for( int l = 0; l < 8; l++ ) + for( int l = 0; l < 10; l++ ) { if( ! interfaceMap( cell ) ) {