Skip to content
Snippets Groups Projects
Commit 5d1fab6e authored by Matouš Fencl's avatar Matouš Fencl
Browse files

CUDA implemented for exact number of loops in 2D and 3D

parent 6d1a8d98
No related branches found
No related tags found
1 merge request!1Hamilton jacobi
...@@ -67,9 +67,7 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ...@@ -67,9 +67,7 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >
template< typename MeshEntity > template< typename MeshEntity >
__cuda_callable__ void updateCell( MeshFunctionType& u, __cuda_callable__ void updateCell( MeshFunctionType& u,
const MeshEntity& cell, const MeshEntity& cell,
const RealType velocity = 1.0 ); const RealType velocity = 1.0 );
protected:
}; };
template< typename Real, template< typename Real,
...@@ -78,7 +76,6 @@ template< typename Real, ...@@ -78,7 +76,6 @@ template< typename Real,
class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
{ {
public: public:
typedef Meshes::Grid< 3, Real, Device, Index > MeshType; typedef Meshes::Grid< 3, Real, Device, Index > MeshType;
typedef Real RealType; typedef Real RealType;
typedef Device DevcieType; typedef Device DevcieType;
...@@ -96,11 +93,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ...@@ -96,11 +93,6 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > >
__cuda_callable__ void updateCell( MeshFunctionType& u, __cuda_callable__ void updateCell( MeshFunctionType& u,
const MeshEntity& cell, const MeshEntity& cell,
const RealType velocity = 1.0); const RealType velocity = 1.0);
/*Real sort( Real a, Real b, Real c,
const RealType& ha,
const RealType& hb,
const RealType& hc ); */
}; };
template < typename T1, typename T2 > template < typename T1, typename T2 >
...@@ -112,7 +104,8 @@ __cuda_callable__ void sortMinims( T1 pom[] ); ...@@ -112,7 +104,8 @@ __cuda_callable__ void sortMinims( T1 pom[] );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index > template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux );
template < typename Real, typename Device, typename Index > template < typename Real, typename Device, typename Index >
...@@ -120,7 +113,15 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, ...@@ -120,7 +113,15 @@ __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2,
Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output,
Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap );
//__global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, double, TNL::Devices::Cuda, int > >& input ); template < typename Real, typename Device, typename Index >
__global__ void CudaInitCaller3d( const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& input,
Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& output,
Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap );
template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux );
#endif #endif
#include "tnlDirectEikonalMethodsBase_impl.h" #include "tnlDirectEikonalMethodsBase_impl.h"
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <TNL/SharedPointer.h> #include <TNL/SharedPointer.h>
#include "tnlDirectEikonalMethodsBase.h" #include "tnlDirectEikonalMethodsBase.h"
template< typename Mesh, template< typename Mesh,
typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > > typename Anisotropy = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType > >
class FastSweepingMethod class FastSweepingMethod
...@@ -145,6 +146,7 @@ class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy > ...@@ -145,6 +146,7 @@ class FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >
const IndexType maxIterations; const IndexType maxIterations;
}; };
#include "tnlFastSweepingMethod1D_impl.h" #include "tnlFastSweepingMethod1D_impl.h"
#include "tnlFastSweepingMethod2D_impl.h" #include "tnlFastSweepingMethod2D_impl.h"
#include "tnlFastSweepingMethod3D_impl.h" #include "tnlFastSweepingMethod3D_impl.h"
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#pragma once #pragma once
#include "tnlFastSweepingMethod.h" #include "tnlFastSweepingMethod.h"
#include <TNL/TypeInfo.h>
#include <TNL/Devices/Cuda.h>
template< typename Real, template< typename Real,
typename Device, typename Device,
...@@ -212,15 +215,23 @@ solve( const MeshPointer& mesh, ...@@ -212,15 +215,23 @@ solve( const MeshPointer& mesh,
{ {
// TODO: CUDA code // TODO: CUDA code
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
/*int numBlocks = 2; const int cudaBlockSize( 16 );
int threadsPerBlock; int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
threadsPerBlock = (int)( mesh->getDimensions().x() ); dim3 blockSize( cudaBlockSize, cudaBlockSize );
else dim3 gridSize( numBlocksX, numBlocksY );
threadsPerBlock = (int)( mesh->getDimensions().y() ); Devices::Cuda::synchronizeDevice();
int DIM = mesh->getDimensions().x();
CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;
cudaDeviceSynchronize(); //copak dela?*/ for( int k = 0; k < numBlocksX; k++)
CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr,
interfaceMapPtr.template getData< Device >(),
auxPtr.template modifyData< Device>() );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
aux = *auxPtr;
interfaceMap = *interfaceMapPtr;
#endif #endif
} }
iteration++; iteration++;
...@@ -228,30 +239,30 @@ solve( const MeshPointer& mesh, ...@@ -228,30 +239,30 @@ solve( const MeshPointer& mesh,
aux.save("aux-final.tnl"); aux.save("aux-final.tnl");
} }
//#ifdef HAVE_CUDA
template < typename Real, typename Device, typename Index > template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, __global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux )
{ {
int i = threadIdx.x + blockDim.x*blockIdx.x; int i = threadIdx.x + blockDim.x*blockIdx.x;
int j = threadIdx.y + blockDim.y*blockIdx.y; int j = blockDim.y*blockIdx.y + threadIdx.y;
const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.getMesh(); const Meshes::Grid< 2, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() )
{ {
//make cell of aux from index
typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell;
Cell cell( mesh ); Cell cell( mesh );
cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().x() = i; cell.getCoordinates().y() = j;
cell.refresh(); cell.refresh();
//tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > ptr;
//update cell value few times for( int k = 0; k < 16; k++ )
//for( int i = 0; i < mesh.getDimensions() ; i++ ) {
//{
cell.refresh();
if( ! interfaceMap( cell ) ) if( ! interfaceMap( cell ) )
{ {
// tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::updateCell( aux, cell ); ptr.updateCell( aux, cell );
} }
//} }
} }
} }
//#endif
\ No newline at end of file
...@@ -21,7 +21,7 @@ template< typename Real, ...@@ -21,7 +21,7 @@ template< typename Real,
typename Anisotropy > typename Anisotropy >
FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >:: FastSweepingMethod< Meshes::Grid< 3, Real, Device, Index >, Anisotropy >::
FastSweepingMethod() FastSweepingMethod()
: maxIterations( 1 ) : maxIterations( 2 )
{ {
} }
...@@ -64,6 +64,7 @@ solve( const MeshPointer& mesh, ...@@ -64,6 +64,7 @@ solve( const MeshPointer& mesh,
interfaceMapPtr->setMesh( mesh ); interfaceMapPtr->setMesh( mesh );
std::cout << "Initiating the interface cells ..." << std::endl; std::cout << "Initiating the interface cells ..." << std::endl;
BaseType::initInterface( u, auxPtr, interfaceMapPtr ); BaseType::initInterface( u, auxPtr, interfaceMapPtr );
cudaDeviceSynchronize();
auxPtr->save( "aux-ini.tnl" ); auxPtr->save( "aux-ini.tnl" );
typename MeshType::Cell cell( *mesh ); typename MeshType::Cell cell( *mesh );
...@@ -71,172 +72,201 @@ solve( const MeshPointer& mesh, ...@@ -71,172 +72,201 @@ solve( const MeshPointer& mesh,
IndexType iteration( 0 ); IndexType iteration( 0 );
MeshFunctionType aux = *auxPtr; MeshFunctionType aux = *auxPtr;
InterfaceMapType interfaceMap = * interfaceMapPtr; InterfaceMapType interfaceMap = * interfaceMapPtr;
while( iteration < this->maxIterations ) while( iteration < this->maxIterations )
{ {
for( cell.getCoordinates().z() = 0; if( std::is_same< DeviceType, Devices::Host >::value )
cell.getCoordinates().z() < mesh->getDimensions().z(); {
cell.getCoordinates().z()++ ) for( cell.getCoordinates().z() = 0;
{ cell.getCoordinates().z() < mesh->getDimensions().z();
for( cell.getCoordinates().y() = 0; cell.getCoordinates().z()++ )
cell.getCoordinates().y() < mesh->getDimensions().y(); {
cell.getCoordinates().y()++ ) for( cell.getCoordinates().y() = 0;
{ cell.getCoordinates().y() < mesh->getDimensions().y();
for( cell.getCoordinates().x() = 0; cell.getCoordinates().y()++ )
cell.getCoordinates().x() < mesh->getDimensions().x(); {
cell.getCoordinates().x()++ ) for( cell.getCoordinates().x() = 0;
{ cell.getCoordinates().x() < mesh->getDimensions().x();
cell.refresh(); cell.getCoordinates().x()++ )
if( ! interfaceMap( cell ) ) {
this->updateCell( aux, cell ); cell.refresh();
} if( ! interfaceMap( cell ) )
} this->updateCell( aux, cell );
} }
//aux.save( "aux-1.tnl" ); }
}
//aux.save( "aux-1.tnl" );
for( cell.getCoordinates().z() = 0; for( cell.getCoordinates().z() = 0;
cell.getCoordinates().z() < mesh->getDimensions().z(); cell.getCoordinates().z() < mesh->getDimensions().z();
cell.getCoordinates().z()++ ) cell.getCoordinates().z()++ )
{ {
for( cell.getCoordinates().y() = 0; for( cell.getCoordinates().y() = 0;
cell.getCoordinates().y() < mesh->getDimensions().y(); cell.getCoordinates().y() < mesh->getDimensions().y();
cell.getCoordinates().y()++ ) cell.getCoordinates().y()++ )
{ {
for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
cell.getCoordinates().x() >= 0 ; cell.getCoordinates().x() >= 0 ;
cell.getCoordinates().x()-- ) cell.getCoordinates().x()-- )
{ {
//std::cerr << "2 -> "; //std::cerr << "2 -> ";
cell.refresh(); cell.refresh();
if( ! interfaceMap( cell ) ) if( ! interfaceMap( cell ) )
this->updateCell( aux, cell ); this->updateCell( aux, cell );
} }
} }
} }
//aux.save( "aux-2.tnl" ); //aux.save( "aux-2.tnl" );
for( cell.getCoordinates().z() = 0; for( cell.getCoordinates().z() = 0;
cell.getCoordinates().z() < mesh->getDimensions().z(); cell.getCoordinates().z() < mesh->getDimensions().z();
cell.getCoordinates().z()++ ) cell.getCoordinates().z()++ )
{ {
for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1;
cell.getCoordinates().y() >= 0 ; cell.getCoordinates().y() >= 0 ;
cell.getCoordinates().y()-- ) cell.getCoordinates().y()-- )
{ {
for( cell.getCoordinates().x() = 0; for( cell.getCoordinates().x() = 0;
cell.getCoordinates().x() < mesh->getDimensions().x(); cell.getCoordinates().x() < mesh->getDimensions().x();
cell.getCoordinates().x()++ ) cell.getCoordinates().x()++ )
{ {
//std::cerr << "3 -> "; //std::cerr << "3 -> ";
cell.refresh(); cell.refresh();
if( ! interfaceMap( cell ) ) if( ! interfaceMap( cell ) )
this->updateCell( aux, cell ); this->updateCell( aux, cell );
} }
} }
} }
//aux.save( "aux-3.tnl" ); //aux.save( "aux-3.tnl" );
for( cell.getCoordinates().z() = 0;
cell.getCoordinates().z() < mesh->getDimensions().z();
cell.getCoordinates().z()++ )
{
for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1;
cell.getCoordinates().y() >= 0;
cell.getCoordinates().y()-- )
{
for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
cell.getCoordinates().x() >= 0 ;
cell.getCoordinates().x()-- )
{
//std::cerr << "4 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
//aux.save( "aux-4.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1;
cell.getCoordinates().z() >= 0;
cell.getCoordinates().z()-- )
{
for( cell.getCoordinates().y() = 0;
cell.getCoordinates().y() < mesh->getDimensions().y();
cell.getCoordinates().y()++ )
{
for( cell.getCoordinates().x() = 0;
cell.getCoordinates().x() < mesh->getDimensions().x();
cell.getCoordinates().x()++ )
{
//std::cerr << "5 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
//aux.save( "aux-5.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; for( cell.getCoordinates().z() = 0;
cell.getCoordinates().z() >= 0; cell.getCoordinates().z() < mesh->getDimensions().z();
cell.getCoordinates().z()-- ) cell.getCoordinates().z()++ )
{ {
for( cell.getCoordinates().y() = 0; for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1;
cell.getCoordinates().y() < mesh->getDimensions().y(); cell.getCoordinates().y() >= 0;
cell.getCoordinates().y()++ ) cell.getCoordinates().y()-- )
{ {
for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
cell.getCoordinates().x() >= 0 ; cell.getCoordinates().x() >= 0 ;
cell.getCoordinates().x()-- ) cell.getCoordinates().x()-- )
{ {
//std::cerr << "6 -> "; //std::cerr << "4 -> ";
cell.refresh(); cell.refresh();
if( ! interfaceMap( cell ) ) if( ! interfaceMap( cell ) )
this->updateCell( aux, cell ); this->updateCell( aux, cell );
} }
} }
} }
//aux.save( "aux-6.tnl" ); //aux.save( "aux-4.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1;
cell.getCoordinates().z() >= 0;
cell.getCoordinates().z()-- )
{
for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1;
cell.getCoordinates().y() >= 0 ;
cell.getCoordinates().y()-- )
{
for( cell.getCoordinates().x() = 0;
cell.getCoordinates().x() < mesh->getDimensions().x();
cell.getCoordinates().x()++ )
{
//std::cerr << "7 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
//aux.save( "aux-7.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1; for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1;
cell.getCoordinates().z() >= 0; cell.getCoordinates().z() >= 0;
cell.getCoordinates().z()-- ) cell.getCoordinates().z()-- )
{
for( cell.getCoordinates().y() = 0;
cell.getCoordinates().y() < mesh->getDimensions().y();
cell.getCoordinates().y()++ )
{
for( cell.getCoordinates().x() = 0;
cell.getCoordinates().x() < mesh->getDimensions().x();
cell.getCoordinates().x()++ )
{
//std::cerr << "5 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
//aux.save( "aux-5.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1;
cell.getCoordinates().z() >= 0;
cell.getCoordinates().z()-- )
{
for( cell.getCoordinates().y() = 0;
cell.getCoordinates().y() < mesh->getDimensions().y();
cell.getCoordinates().y()++ )
{
for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
cell.getCoordinates().x() >= 0 ;
cell.getCoordinates().x()-- )
{
//std::cerr << "6 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
//aux.save( "aux-6.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1;
cell.getCoordinates().z() >= 0;
cell.getCoordinates().z()-- )
{
for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1;
cell.getCoordinates().y() >= 0 ;
cell.getCoordinates().y()-- )
{
for( cell.getCoordinates().x() = 0;
cell.getCoordinates().x() < mesh->getDimensions().x();
cell.getCoordinates().x()++ )
{
//std::cerr << "7 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
//aux.save( "aux-7.tnl" );
for( cell.getCoordinates().z() = mesh->getDimensions().z() - 1;
cell.getCoordinates().z() >= 0;
cell.getCoordinates().z()-- )
{
for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1;
cell.getCoordinates().y() >= 0;
cell.getCoordinates().y()-- )
{
for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1;
cell.getCoordinates().x() >= 0 ;
cell.getCoordinates().x()-- )
{
//std::cerr << "8 -> ";
cell.refresh();
if( ! interfaceMap( cell ) )
this->updateCell( aux, cell );
}
}
}
}
if( std::is_same< DeviceType, Devices::Cuda >::value )
{ {
for( cell.getCoordinates().y() = mesh->getDimensions().y() - 1; // TODO: CUDA code
cell.getCoordinates().y() >= 0; #ifdef HAVE_CUDA
cell.getCoordinates().y()-- ) const int cudaBlockSize( 8 );
{ int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize );
for( cell.getCoordinates().x() = mesh->getDimensions().x() - 1; int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y(), cudaBlockSize );
cell.getCoordinates().x() >= 0 ; int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z(), cudaBlockSize );
cell.getCoordinates().x()-- ) if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 )
{ std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl;
//std::cerr << "8 -> "; dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize );
cell.refresh(); dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ );
if( ! interfaceMap( cell ) ) Devices::Cuda::synchronizeDevice();
this->updateCell( aux, cell );
} tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr;
} for( int k = 0; k < numBlocksX; k++)
CudaUpdateCellCaller< Real, Device, Index ><<< gridSize, blockSize >>>( ptr,
interfaceMapPtr.template getData< Device >(),
auxPtr.template modifyData< Device>() );
cudaDeviceSynchronize();
TNL_CHECK_CUDA_DEVICE;
aux = *auxPtr;
interfaceMap = *interfaceMapPtr;
#endif
} }
//aux.save( "aux-8.tnl" ); //aux.save( "aux-8.tnl" );
iteration++; iteration++;
...@@ -244,3 +274,29 @@ solve( const MeshPointer& mesh, ...@@ -244,3 +274,29 @@ solve( const MeshPointer& mesh,
aux.save("aux-final.tnl"); aux.save("aux-final.tnl");
} }
template < typename Real, typename Device, typename Index >
__global__ void CudaUpdateCellCaller( tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr,
const Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index >, 3, bool >& interfaceMap,
Functions::MeshFunction< Meshes::Grid< 3, Real, Device, Index > >& aux )
{
int i = threadIdx.x + blockDim.x*blockIdx.x;
int j = blockDim.y*blockIdx.y + threadIdx.y;
int k = blockDim.z*blockIdx.z + threadIdx.z;
const Meshes::Grid< 3, Real, Device, Index >& mesh = interfaceMap.template getMesh< Devices::Cuda >();
if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() && k < mesh.getDimensions().z() )
{
typedef typename Meshes::Grid< 3, Real, Device, Index >::Cell Cell;
Cell cell( mesh );
cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.getCoordinates().z() = k;
cell.refresh();
//tnlDirectEikonalMethodsBase< Meshes::Grid< 3, Real, Device, Index > > ptr;
for( int l = 0; l < 8; l++ )
{
if( ! interfaceMap( cell ) )
{
ptr.updateCell( aux, cell );
}
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment