Loading src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +13 −2 Original line number Diff line number Diff line Loading @@ -49,7 +49,6 @@ template< typename Real, class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { public: typedef Meshes::Grid< 2, Real, Device, Index > MeshType; typedef Real RealType; typedef Device DevcieType; Loading @@ -65,6 +64,8 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, const RealType velocity = 1.0 ); protected: }; template< typename Real, Loading Loading @@ -100,6 +101,16 @@ template < typename T1, typename T2 > T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double v = 1); template < typename T1 > void sortMinims( T1 pom[] ); __cuda_callable__ void sortMinims( T1 pom[] ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); #include "tnlDirectEikonalMethodsBase_impl.h" src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +185 −90 Original line number Diff line number Diff line Loading @@ -8,7 +8,9 @@ #pragma once #include <TNL/TypeInfo.h> #include <iostream> #include "tnlFastSweepingMethod.h" template< typename Real, typename Device, typename Index > Loading Loading @@ -67,7 +69,6 @@ updateCell( MeshFunctionType& u, { } template< typename Real, typename Device, typename Index > Loading @@ -77,9 +78,30 @@ initInterface( const MeshFunctionType& input, MeshFunctionType& output, InterfaceMapType& interfaceMap ) { /* doplnit přepočty pro cudu: * overit is_same device * na kazdy bod jedno cuda vlakno */ const MeshType& mesh = input.getMesh(); typedef typename MeshType::Cell Cell; Cell cell( mesh ); if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) Loading Loading @@ -112,7 +134,7 @@ initInterface( const MeshFunctionType& input, const IndexType e = neighbors.template getEntityIndex< 1, 0 >(); const IndexType n = neighbors.template getEntityIndex< 0, 1 >(); //Try init with exact data: if( c * input[ n ] <= 0 ) /*if( c * input[ n ] <= 0 ) { output[ cell.getIndex() ] = c; output[ n ] = input[ n ]; Loading @@ -125,39 +147,40 @@ initInterface( const MeshFunctionType& input, output[ e ] = input[ e ]; interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; } /*if( c * input[ n ] <= 0 ) { if( c >= 0 ) }*/ if( c * input[ n ] <= 0 ) { pom = ( hy * c )/( c - input[ n ]); if( output[ cell.getIndex() ] > pom ) /*if( c >= 0 ) {*/ pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; if( output[ n ] < pom - hy ) output[ n ] = pom - hy; //( hy * c )/( c - input[ n ]) - hy; }else pom = pom - TNL::sign( c )*hy; if( TNL::abs( output[ n ] ) > TNL::abs( pom ) ) output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy; /*}else { pom = - ( hy * c )/( c - input[ n ]); if( output[ cell.getIndex() ] < pom ) output[ cell.getIndex() ] = pom; if( output[ n ] > hy + pom ) output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); } }*/ interfaceMap[ cell.getIndex() ] = true; interfaceMap[ n ] = true; } if( c * input[ e ] <= 0 ) { if( c >= 0 ) { pom = ( hx * c )/( c - input[ e ]); if( output[ cell.getIndex() ] > pom ) /*if( c >= 0 ) {*/ pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( output[ e ] < pom ) pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) output[ e ] = pom; }else /*}else { pom = - (hx * c)/( c - input[ e ]); if( output[ cell.getIndex() ] < pom ) Loading @@ -166,10 +189,11 @@ initInterface( const MeshFunctionType& input, pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); if( output[ e ] > pom ) output[ e ] = pom; } }*/ interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; }*/ } } } } } Loading @@ -190,7 +214,7 @@ updateCell( MeshFunctionType& u, const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y(); const RealType value = u( cell ); RealType a, b, tmp1, tmp = TypeInfo< RealType >::getMaxValue(); RealType a, b, tmp = TypeInfo< RealType >::getMaxValue(); if( cell.getCoordinates().x() == 0 ) a = u[ neighborEntities.template getEntityIndex< 1, 0 >() ]; Loading Loading @@ -538,7 +562,7 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double } template < typename T1 > void sortMinims( T1 pom[]) __cuda_callable__ void sortMinims( T1 pom[]) { T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ Loading Loading @@ -572,3 +596,74 @@ void sortMinims( T1 pom[]) pom[ i ] = tmp[ i ]; } } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); output[ cell.getIndex() ] = input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - TypeInfo< Real >::getMaxValue(); interfaceMap[ cell.getIndex() ] = false; const Real& hx = mesh.getSpaceSteps().x(); const Real& hy = mesh.getSpaceSteps().y(); cell.refresh(); const Real& c = input( cell ); if( ! cell.isBoundaryEntity() ) { auto neighbors = cell.getNeighborEntities(); Real pom = 0; const Index e = neighbors.template getEntityIndex< 1, 0 >(); const Index w = neighbors.template getEntityIndex< -1, 0 >(); const Index n = neighbors.template getEntityIndex< 0, 1 >(); const Index s = neighbors.template getEntityIndex< 0, -1 >(); if( c * input[ n ] <= 0 ) { pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ e ] <= 0 ) { pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ w ] <= 0 ) { pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ s ] <= 0 ) { pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } } } } No newline at end of file src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +47 −2 Original line number Diff line number Diff line Loading @@ -64,7 +64,17 @@ solve( const MeshPointer& mesh, interfaceMap.setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); //if( std::is_same< DeviceType, Devices::Cuda >::value ) //{ // Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux; //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost ); //h_aux->save("aux-init-cuda.tnl"); //} //if( std::is_same< DeviceType, Devices::Host >::value ) { aux.save( "aux-ini.tnl" ); } typename MeshType::Cell cell( *mesh ); Loading Loading @@ -207,10 +217,45 @@ solve( const MeshPointer& mesh, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code } int numBlocks = 2; int threadsPerBlock; if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) threadsPerBlock = (int)( mesh->getDimensions().x() ); else threadsPerBlock = (int)( mesh->getDimensions().y() ); CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); cudaDeviceSynchronize(); //copak dela? } iteration++; } aux.save("aux-final.tnl"); } template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = threadIdx.y + blockDim.y*blockIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.getMesh(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { //make cell of aux from index typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); //update cell value few times //for( int i = 0; i < mesh.getDimensions() ; i++ ) //{ cell.refresh(); if( ! interfaceMap( cell ) ) { // tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::updateCell( aux, cell ); } //} } } Loading
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase.h +13 −2 Original line number Diff line number Diff line Loading @@ -49,7 +49,6 @@ template< typename Real, class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > { public: typedef Meshes::Grid< 2, Real, Device, Index > MeshType; typedef Real RealType; typedef Device DevcieType; Loading @@ -65,6 +64,8 @@ class tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > > __cuda_callable__ void updateCell( MeshFunctionType& u, const MeshEntity& cell, const RealType velocity = 1.0 ); protected: }; template< typename Real, Loading Loading @@ -100,6 +101,16 @@ template < typename T1, typename T2 > T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double v = 1); template < typename T1 > void sortMinims( T1 pom[] ); __cuda_callable__ void sortMinims( T1 pom[] ); template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ); template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ); #include "tnlDirectEikonalMethodsBase_impl.h"
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h +185 −90 Original line number Diff line number Diff line Loading @@ -8,7 +8,9 @@ #pragma once #include <TNL/TypeInfo.h> #include <iostream> #include "tnlFastSweepingMethod.h" template< typename Real, typename Device, typename Index > Loading Loading @@ -67,7 +69,6 @@ updateCell( MeshFunctionType& u, { } template< typename Real, typename Device, typename Index > Loading @@ -77,9 +78,30 @@ initInterface( const MeshFunctionType& input, MeshFunctionType& output, InterfaceMapType& interfaceMap ) { /* doplnit přepočty pro cudu: * overit is_same device * na kazdy bod jedno cuda vlakno */ const MeshType& mesh = input.getMesh(); typedef typename MeshType::Cell Cell; Cell cell( mesh ); if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA const int cudaBlockSize( 16 ); int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); Devices::Cuda::synchronizeDevice(); CudaInitCaller< Real, Device, Index ><<< gridSize, blockSize >>>( input, output, interfaceMap ); TNL_CHECK_CUDA_DEVICE; #endif } if( std::is_same< Device, Devices::Host >::value ) { for( cell.getCoordinates().y() = 0; cell.getCoordinates().y() < mesh.getDimensions().y(); cell.getCoordinates().y() ++ ) Loading Loading @@ -112,7 +134,7 @@ initInterface( const MeshFunctionType& input, const IndexType e = neighbors.template getEntityIndex< 1, 0 >(); const IndexType n = neighbors.template getEntityIndex< 0, 1 >(); //Try init with exact data: if( c * input[ n ] <= 0 ) /*if( c * input[ n ] <= 0 ) { output[ cell.getIndex() ] = c; output[ n ] = input[ n ]; Loading @@ -125,39 +147,40 @@ initInterface( const MeshFunctionType& input, output[ e ] = input[ e ]; interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; } /*if( c * input[ n ] <= 0 ) { if( c >= 0 ) }*/ if( c * input[ n ] <= 0 ) { pom = ( hy * c )/( c - input[ n ]); if( output[ cell.getIndex() ] > pom ) /*if( c >= 0 ) {*/ pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; if( output[ n ] < pom - hy ) output[ n ] = pom - hy; //( hy * c )/( c - input[ n ]) - hy; }else pom = pom - TNL::sign( c )*hy; if( TNL::abs( output[ n ] ) > TNL::abs( pom ) ) output[ n ] = pom; //( hy * c )/( c - input[ n ]) - hy; /*}else { pom = - ( hy * c )/( c - input[ n ]); if( output[ cell.getIndex() ] < pom ) output[ cell.getIndex() ] = pom; if( output[ n ] > hy + pom ) output[ n ] = hy + pom; //hy - ( hy * c )/( c - input[ n ]); } }*/ interfaceMap[ cell.getIndex() ] = true; interfaceMap[ n ] = true; } if( c * input[ e ] <= 0 ) { if( c >= 0 ) { pom = ( hx * c )/( c - input[ e ]); if( output[ cell.getIndex() ] > pom ) /*if( c >= 0 ) {*/ pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; pom = pom - hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( output[ e ] < pom ) pom = pom - TNL::sign( c )*hx; //output[ e ] = (hx * c)/( c - input[ e ]) - hx; if( TNL::abs( output[ e ] ) > TNL::abs( pom ) ) output[ e ] = pom; }else /*}else { pom = - (hx * c)/( c - input[ e ]); if( output[ cell.getIndex() ] < pom ) Loading @@ -166,10 +189,11 @@ initInterface( const MeshFunctionType& input, pom = pom + hx; //output[ e ] = hx - (hx * c)/( c - input[ e ]); if( output[ e ] > pom ) output[ e ] = pom; } }*/ interfaceMap[ cell.getIndex() ] = true; interfaceMap[ e ] = true; }*/ } } } } } Loading @@ -190,7 +214,7 @@ updateCell( MeshFunctionType& u, const RealType& hx = mesh.getSpaceSteps().x(); const RealType& hy = mesh.getSpaceSteps().y(); const RealType value = u( cell ); RealType a, b, tmp1, tmp = TypeInfo< RealType >::getMaxValue(); RealType a, b, tmp = TypeInfo< RealType >::getMaxValue(); if( cell.getCoordinates().x() == 0 ) a = u[ neighborEntities.template getEntityIndex< 1, 0 >() ]; Loading Loading @@ -538,7 +562,7 @@ T1 meet2DCondition( T1 a, T1 b, const T2 ha, const T2 hb, const T1 value, double } template < typename T1 > void sortMinims( T1 pom[]) __cuda_callable__ void sortMinims( T1 pom[]) { T1 tmp[6] = {0.0,0.0,0.0,0.0,0.0,0.0}; if( fabs(pom[0]) <= fabs(pom[1]) && fabs(pom[1]) <= fabs(pom[2])){ Loading Loading @@ -572,3 +596,74 @@ void sortMinims( T1 pom[]) pom[ i ] = tmp[ i ]; } } template < typename Real, typename Device, typename Index > __global__ void CudaInitCaller( const Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& input, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& output, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = input.getMesh(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); output[ cell.getIndex() ] = input( cell ) >= 0 ? TypeInfo< Real >::getMaxValue() : - TypeInfo< Real >::getMaxValue(); interfaceMap[ cell.getIndex() ] = false; const Real& hx = mesh.getSpaceSteps().x(); const Real& hy = mesh.getSpaceSteps().y(); cell.refresh(); const Real& c = input( cell ); if( ! cell.isBoundaryEntity() ) { auto neighbors = cell.getNeighborEntities(); Real pom = 0; const Index e = neighbors.template getEntityIndex< 1, 0 >(); const Index w = neighbors.template getEntityIndex< -1, 0 >(); const Index n = neighbors.template getEntityIndex< 0, 1 >(); const Index s = neighbors.template getEntityIndex< 0, -1 >(); if( c * input[ n ] <= 0 ) { pom = TNL::sign( c )*( hy * c )/( c - input[ n ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ e ] <= 0 ) { pom = TNL::sign( c )*( hx * c )/( c - input[ e ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ w ] <= 0 ) { pom = TNL::sign( c )*( hx * c )/( c - input[ w ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } if( c * input[ s ] <= 0 ) { pom = TNL::sign( c )*( hy * c )/( c - input[ s ]); if( TNL::abs( output[ cell.getIndex() ] ) > TNL::abs( pom ) ) output[ cell.getIndex() ] = pom; interfaceMap[ cell.getIndex() ] = true; } } } } No newline at end of file
src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +47 −2 Original line number Diff line number Diff line Loading @@ -64,7 +64,17 @@ solve( const MeshPointer& mesh, interfaceMap.setMesh( mesh ); std::cout << "Initiating the interface cells ..." << std::endl; BaseType::initInterface( u, aux, interfaceMap ); //if( std::is_same< DeviceType, Devices::Cuda >::value ) //{ // Functions::MeshFunction< Meshes::Grid< 2, Real, TNL::Devices::Host, Index > > h_aux; //cudaMemcpy( h_aux, aux, sizeof(MeshFunctionType), cudaMemcpyDeviceToHost ); //h_aux->save("aux-init-cuda.tnl"); //} //if( std::is_same< DeviceType, Devices::Host >::value ) { aux.save( "aux-ini.tnl" ); } typename MeshType::Cell cell( *mesh ); Loading Loading @@ -207,10 +217,45 @@ solve( const MeshPointer& mesh, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // TODO: CUDA code } int numBlocks = 2; int threadsPerBlock; if( mesh->getDimensions().x() >= mesh->getDimensions().y() ) threadsPerBlock = (int)( mesh->getDimensions().x() ); else threadsPerBlock = (int)( mesh->getDimensions().y() ); CudaUpdateCellCaller< Real, Device, Index ><<< numBlocks, threadsPerBlock >>>( interfaceMap, aux ); cudaDeviceSynchronize(); //copak dela? } iteration++; } aux.save("aux-final.tnl"); } template < typename Real, typename Device, typename Index > __global__ void CudaUpdateCellCaller( Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index >, 2, bool >& interfaceMap, Functions::MeshFunction< Meshes::Grid< 2, Real, Device, Index > >& aux ) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = threadIdx.y + blockDim.y*blockIdx.y; const Meshes::Grid< 2, Real, Device, Index >& mesh = aux.getMesh(); if( i < mesh.getDimensions().x() && j < mesh.getDimensions().y() ) { //make cell of aux from index typedef typename Meshes::Grid< 2, Real, Device, Index >::Cell Cell; Cell cell( mesh ); cell.getCoordinates().x() = i; cell.getCoordinates().y() = j; cell.refresh(); //update cell value few times //for( int i = 0; i < mesh.getDimensions() ; i++ ) //{ cell.refresh(); if( ! interfaceMap( cell ) ) { // tnlDirectEikonalMethodsBase< Meshes::Grid< 2, Real, Device, Index > >::updateCell( aux, cell ); } //} } }