Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +6 −2 Original line number Diff line number Diff line Loading @@ -104,13 +104,17 @@ template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); template < typename Real, typename Device, typename Index > /*template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );*/ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ); #endif #include "tnlDirectEikonalMethodsBase_impl.h" src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +21 −7 Original line number Diff line number Diff line Loading @@ -83,25 +83,30 @@ initInterface( const MeshFunctionType& input, * overit is_same device * na kazdy bod jedno cuda vlakno */ const MeshType& mesh = input.getMesh(); typedef typename MeshType::Cell Cell; Cell cell( mesh ); if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA const MeshType& mesh = input.getMesh(); const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); //CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); CudaInitCaller<<< gridSize, blockSize >>>( input ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { const MeshType& mesh = input.getMesh(); typedef typename MeshType::Cell Cell; Cell cell( mesh ); for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) Loading Loading @@ -597,7 +602,7 @@ __cuda_callable__ void sortMinims( T1 pom[]) } } template < typename Real, typename Device, typename Index > /*template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) Loading @@ -606,7 +611,7 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, int j = blockDim.y*blockIdx.y + threadIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) //if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); Loading Loading @@ -666,4 +671,13 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, } } } }*/ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; //const Meshes::Grid< 2, double, TNL::Devices::Cuda, int >& mesh = input.getMesh(); } src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +7 −13 Original line number Diff line number Diff line Loading @@ -64,17 +64,9 @@ solve( const MeshPointer& mesh, interfaceMap.setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); cudaDeviceSynchronize(); //if( std::is_same< DeviceType, Devices::Cuda >::value ) //{ // Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux; //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost ); //h_aux->save("aux-init-cuda.tnl"); //} //if( std::is_same< DeviceType, Devices::Host >::value ) { aux.save( "aux-ini.tnl" ); } typename MeshType::Cell cell( *mesh ); Loading Loading @@ -217,7 +209,8 @@ solve( const MeshPointer& mesh, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code int numBlocks = 2; #ifdef HAVE_CUDA /*int numBlocks = 2; int threadsPerBlock; if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) threadsPerBlock = (int)( mesh->getDimensions().x() ); Loading @@ -225,7 +218,8 @@ solve( const MeshPointer& mesh, threadsPerBlock = (int)( mesh->getDimensions().y() ); CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); cudaDeviceSynchronize(); //copak dela? cudaDeviceSynchronize(); //copak dela?*/ #endif } iteration++; } Loading Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +6 −2 Original line number Diff line number Diff line Loading @@ -104,13 +104,17 @@ template < typename T1 > __cuda_callable__ void sortMinims( T1 pom[] ); #ifdef HAVE_CUDA template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); template < typename Real, typename Device, typename Index > /*template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );*/ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ); #endif #include "tnlDirectEikonalMethodsBase_impl.h"
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +21 −7 Original line number Diff line number Diff line Loading @@ -83,25 +83,30 @@ initInterface( const MeshFunctionType& input, * overit is_same device * na kazdy bod jedno cuda vlakno */ const MeshType& mesh = input.getMesh(); typedef typename MeshType::Cell Cell; Cell cell( mesh ); if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA const MeshType& mesh = input.getMesh(); const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); //CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); CudaInitCaller<<< gridSize, blockSize >>>( input ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { const MeshType& mesh = input.getMesh(); typedef typename MeshType::Cell Cell; Cell cell( mesh ); for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) Loading Loading @@ -597,7 +602,7 @@ __cuda_callable__ void sortMinims( T1 pom[]) } } template < typename Real, typename Device, typename Index > /*template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) Loading @@ -606,7 +611,7 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, int j = blockDim.y*blockIdx.y + threadIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) //if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); Loading Loading @@ -666,4 +671,13 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, } } } }*/ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; //const Meshes::Grid< 2, double, TNL::Devices::Cuda, int >& mesh = input.getMesh(); }
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +7 −13 Original line number Diff line number Diff line Loading @@ -64,17 +64,9 @@ solve( const MeshPointer& mesh, interfaceMap.setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); cudaDeviceSynchronize(); //if( std::is_same< DeviceType, Devices::Cuda >::value ) //{ // Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux; //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost ); //h_aux->save("aux-init-cuda.tnl"); //} //if( std::is_same< DeviceType, Devices::Host >::value ) { aux.save( "aux-ini.tnl" ); } typename MeshType::Cell cell( *mesh ); Loading Loading @@ -217,7 +209,8 @@ solve( const MeshPointer& mesh, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code int numBlocks = 2; #ifdef HAVE_CUDA /*int numBlocks = 2; int threadsPerBlock; if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) threadsPerBlock = (int)( mesh->getDimensions().x() ); Loading @@ -225,7 +218,8 @@ solve( const MeshPointer& mesh, threadsPerBlock = (int)( mesh->getDimensions().y() ); CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); cudaDeviceSynchronize(); //copak dela? cudaDeviceSynchronize(); //copak dela?*/ #endif } iteration++; } Loading