diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h index e6781a7d647f8f99705c61f6856ea5e492e20b04..ed176f08a92c7ab210ddebe16e3523fa6da56966 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h @@ -104,13 +104,17 @@ template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); +#ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); -template < typename Real, typename Device, typename Index > +/*template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, - Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); + Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );*/ + +__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ); +#endif #include "tnlDirectEikonalMethodsBase_impl.h" diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h index 1df41cf804390c5a1531c605811268744158dfe1..8444b6c37801f3a428742760d687cda7a0b9ed1f 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h @@ -83,25 +83,30 @@ initInterface( const MeshFunctionType& input, * overit is_same device * na kazdy bod jedno cuda vlakno */ - const MeshType& mesh = input.getMesh(); - typedef typename MeshType::Cell Cell; - Cell cell( mesh ); + if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA + const MeshType& mesh = input.getMesh(); + const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); - CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); + //CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); + CudaInitCaller<<< gridSize, blockSize >>>( input ); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { + const MeshType& mesh = input.getMesh(); + typedef typename MeshType::Cell Cell; + Cell cell( mesh ); for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) @@ -597,7 +602,7 @@ __cuda_callable__ void sortMinims( T1 pom[]) } } -template < typename Real, typename Device, typename Index > +/*template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) @@ -606,7 +611,7 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, int j = blockDim.y*blockIdx.y + threadIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh(); - if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) + //if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); @@ -666,4 +671,13 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, } } } -} \ No newline at end of file +}*/ + + +__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ) +{ + int i = threadIdx.x + blockDim.x*blockIdx.x; + int j = blockDim.y*blockIdx.y + threadIdx.y; + //const Meshes::Grid< 2, double, TNL::Devices::Cuda, int >& mesh = input.getMesh(); + +} diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index 0f939d7554bce7f5bb2d6bd35471442ca792db11..48640e3fc5772c5f629dcaf9c4b66b4a63e9b179 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -64,17 +64,9 @@ solve( const MeshPointer& mesh, interfaceMap.setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); - - //if( std::is_same< DeviceType, Devices::Cuda >::value ) - //{ - // Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux; - //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost ); - //h_aux->save("aux-init-cuda.tnl"); - //} - //if( std::is_same< DeviceType, Devices::Host >::value ) - { - aux.save( "aux-ini.tnl" ); - } + cudaDeviceSynchronize(); + + aux.save( "aux-ini.tnl" ); typename MeshType::Cell cell( *mesh ); @@ -217,7 +209,8 @@ solve( const MeshPointer& mesh, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code - int numBlocks = 2; +#ifdef HAVE_CUDA + /*int numBlocks = 2; int threadsPerBlock; if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) threadsPerBlock = (int)( mesh->getDimensions().x() ); @@ -225,7 +218,8 @@ solve( const MeshPointer& mesh, threadsPerBlock = (int)( mesh->getDimensions().y() ); CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); - cudaDeviceSynchronize(); //copak dela? + cudaDeviceSynchronize(); //copak dela?*/ +#endif } iteration++; }