diff --git a/src/TNL/Solvers/PDE/ExplicitUpdater.h b/src/TNL/Solvers/PDE/ExplicitUpdater.h index 3c96a914130bfec68fb61317fa6f57f79c063936..d6683b79d54a60c39515f3f231526978a60a3852 100644 --- a/src/TNL/Solvers/PDE/ExplicitUpdater.h +++ b/src/TNL/Solvers/PDE/ExplicitUpdater.h @@ -146,6 +146,9 @@ class ExplicitUpdater { TNL_ASSERT_TRUE( this->userData.boundaryConditions, "The boundary conditions are not correctly set-up. Use method setBoundaryCondtions() to do it." ); + TNL_ASSERT_TRUE( &uPointer.template modifyData< DeviceType >(), + "The function u is not correctly set-up. It was not bound probably with DOFs." ); + this->userData.time = time; this->userData.u = &uPointer.template modifyData< DeviceType >(); Meshes::Traverser< MeshType, EntityType > meshTraverser; diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h index 87f595a3ae73713ef0ae224240ada2dbc2257605..c1d24d0d8523ea79b7f70141800b9b6210b8f1d3 100644 --- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h +++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem.h @@ -3,6 +3,7 @@ #include <TNL/Problems/PDEProblem.h> #include <TNL/Functions/MeshFunction.h> +#include <TNL/Solvers/PDE/ExplicitUpdater.h> #include "Tuning/ExplicitUpdater.h" using namespace TNL; @@ -93,7 +94,8 @@ class HeatEquationBenchmarkProblem: RightHandSide* cudaRightHandSide; DifferentialOperator* cudaDifferentialOperator; - TNL::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater; + TNL::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > tuningExplicitUpdater; + TNL::Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater; }; diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h index bda9ee25df6c31cd4a8db4aecdab668a3a7b62fd..76539e22f7cce06f985c1406aea3c7aabf356f2e 100644 --- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h +++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h @@ -5,9 +5,11 @@ #include <TNL/Matrices/MatrixSetter.h> #include <TNL/Solvers/PDE/LinearSystemAssembler.h> #include <TNL/Solvers/PDE/BackwardTimeDiscretisation.h> +#include <TNL/Solvers/PDE/ExplicitUpdater.h> #include "TestGridEntity.h" #include "Tuning/tunning.h" #include "Tuning/SimpleCell.h" +#include "Tuning/GridTraverser.h" template< typename Mesh, typename BoundaryCondition, @@ -41,7 +43,14 @@ String HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: getPrologHeader() const { - return String( "Heat Equation Benchmark" ); + if( this->cudaKernelType == "pure-c" ) + return String( "Heat Equation Benchmark PURE-C test" ); + if( this->cudaKernelType == "templated" ) + return String( "Heat Equation Benchmark TEMPLATED test" ); + if( this->cudaKernelType == "templated-compact" ) + return String( "Heat Equation Benchmark TEMPLATED COMPACT test" ); + if( this->cudaKernelType == "tunning" ) + return String( "Heat Equation Benchmark TUNNIG test" ); } template< typename Mesh, @@ -78,7 +87,9 @@ setup( const Config::ParameterContainer& parameters, this->cudaRightHandSide = Devices::Cuda::passToDevice( *this->rightHandSidePointer ); this->cudaDifferentialOperator = Devices::Cuda::passToDevice( *this->differentialOperatorPointer ); } - + this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer ); + this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer ); + this->explicitUpdater.setRightHandSide( this->rightHandSidePointer ); return true; } @@ -105,6 +116,7 @@ void HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: bindDofs( DofVectorPointer& dofsPointer ) { + this->u->bind( this->getMesh(), dofsPointer ); } template< typename Mesh, @@ -516,55 +528,66 @@ getExplicitUpdate( const RealType& time, this->u->bind( mesh, uDofs ); this->fu->bind( mesh, fuDofs ); //explicitUpdater.setGPUTransferTimer( this->gpuTransferTimer ); - explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer ); - explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer ); - explicitUpdater.setRightHandSide( this->rightHandSidePointer ); - - //this->explicitUpdater.template update< typename Mesh::Cell >( time, tau, mesh, this->u, this->fu ); + this->explicitUpdater.template update< typename Mesh::Cell >( time, tau, mesh, this->u, this->fu ); } if( this->cudaKernelType == "tunning" ) { if( std::is_same< DeviceType, Devices::Cuda >::value ) { + /*using ExplicitUpdaterType = TNL::Solvers::PDE::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >; + using Cell = typename MeshType::Cell; + using MeshTraverserType = Meshes::Traverser< MeshType, Cell >; + using UserData = TNL::Solvers::PDE::ExplicitUpdaterTraverserUserData< RealType, + MeshFunctionType, + DifferentialOperator, + BoundaryCondition, + RightHandSide >;*/ + + //using CellConfig = Meshes::GridEntityNoStencilStorage; + + using CellConfig = Meshes::GridEntityCrossStencilStorage< 1 >; + using ExplicitUpdaterType = ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >; + using Cell = SimpleCell< Mesh, CellConfig >; + using MeshTraverserType = Traverser< MeshType, Cell >; using UserData = ExplicitUpdaterTraverserUserData< RealType, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >; - using ExplicitUpdaterType = TNL::ExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide >; + using InteriorEntitiesProcessor = typename ExplicitUpdaterType::TraverserInteriorEntitiesProcessor; using BoundaryEntitiesProcessor = typename ExplicitUpdaterType::TraverserBoundaryEntitiesProcessor; - - const IndexType gridXSize = mesh->getDimensions().x(); - const IndexType gridYSize = mesh->getDimensions().y(); - /*const RealType& hx_inv = mesh->template getSpaceStepsProducts< -2, 0 >(); - const RealType& hy_inv = mesh->template getSpaceStepsProducts< 0, -2 >();*/ - - dim3 cudaBlockSize( 16, 16 ); - dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), - gridYSize / 16 + ( gridYSize % 16 != 0 ) ); - - - /*Pointers::SharedPointer< UserData, Devices::Cuda > userDataPtr; - userDataPtr->time = time; - userDataPtr->differentialOperator = &this->differentialOperatorPointer.template getData< Devices::Cuda >(); - userDataPtr->boundaryConditions = &this->boundaryConditionPointer.template getData< Devices::Cuda >(); - userDataPtr->rightHandSide = NULL; - userDataPtr->u = uDofs->getData(); - userDataPtr->fu = fuDofs->getData();*/ + + this->u->bind( mesh, uDofs ); + this->fu->bind( mesh, fuDofs ); UserData userData; userData.time = time; userData.differentialOperator = &this->differentialOperatorPointer.template getData< Devices::Cuda >(); userData.boundaryConditions = &this->boundaryConditionPointer.template getData< Devices::Cuda >(); - userData.rightHandSide = NULL; + userData.rightHandSide = &this->rightHandSidePointer.template getData< Devices::Cuda >(); + //userData.u = &this->u.template modifyData< Devices::Cuda >(); //uDofs->getData(); + //userData.fu = &this->fu.template modifyData< Devices::Cuda >(); //fuDofs->getData(); userData.u = uDofs->getData(); userData.fu = fuDofs->getData(); - - + + const IndexType gridXSize = mesh->getDimensions().x(); + const IndexType gridYSize = mesh->getDimensions().y(); + dim3 cudaBlockSize( 16, 16 ); + dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), + gridYSize / 16 + ( gridYSize % 16 != 0 ) ); + TNL::Devices::Cuda::synchronizeDevice(); int cudaErr; + /*Meshes::Traverser< MeshType, Cell > meshTraverser; + meshTraverser.template processBoundaryEntities< UserData, + BoundaryEntitiesProcessor > + ( mesh, + userData ); + */ + + _boundaryConditionsKernel< BoundaryEntitiesProcessor, UserData, MeshType, RealType, IndexType > <<< cudaGridSize, cudaBlockSize >>> ( &mesh.template getData< Devices::Cuda >(), @@ -576,20 +599,17 @@ getExplicitUpdate( const RealType& time, return; } - /**** - * Laplace operator - */ - //cout << "Laplace operator ... " << endl; - /*Meshes::Traverser< MeshType, SimpleCell > meshTraverser - meshTraverser.template processInteriorEntities< UserData, + + /*meshTraverser.template processInteriorEntities< UserData, InteriorEntitiesProcessor > - ( meshPointer, - userDataPointer );*/ + ( mesh, + userData ); + * */ _heatEquationKernel< InteriorEntitiesProcessor, UserData, MeshType, RealType, IndexType > <<< cudaGridSize, cudaBlockSize >>> ( &mesh.template getData< Devices::Cuda >(), userData ); - //&userDataPtr.template modifyData< Devices::Cuda >() ); + //&userDataPtr.template modifyData< Devices::Cuda >() );*/ if( cudaGetLastError() != cudaSuccess ) { std::cerr << "Laplace operator failed." << std::endl; diff --git a/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser.h b/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser.h index 5a0dc7f0b58d9e810a9a6880eea7d91192cf0025..cdbc4922ca07eda7da0ba442340705f6646d8430 100644 --- a/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser.h +++ b/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser.h @@ -19,7 +19,7 @@ namespace TNL { /**** * This is only a helper class for Traverser specializations for Grid. */ -template< typename Grid > +template< typename Grid, typename Cell > class GridTraverser { }; @@ -28,8 +28,9 @@ class GridTraverser * 2D grid, Devices::Host */ template< typename Real, - typename Index > -class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index > > + typename Index, + typename Cell > +class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index >, Cell > { public: @@ -53,7 +54,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index > > const GridPointer& gridPointer, const CoordinatesType begin, const CoordinatesType end, - Pointers::SharedPointer< UserData, DeviceType >& userData, + UserData& userData, // FIXME: hack around nvcc bug (error: default argument not at end of parameter list) // const int& stream = 0, const int& stream, @@ -66,8 +67,9 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index > > * 2D grid, Devices::Cuda */ template< typename Real, - typename Index > -class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > > + typename Index, + typename Cell > +class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index >, Cell > { public: @@ -91,7 +93,7 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > > const GridPointer& gridPointer, const CoordinatesType& begin, const CoordinatesType& end, - Pointers::SharedPointer< UserData, DeviceType >& userData, + UserData& userData, // FIXME: hack around nvcc bug (error: default argument not at end of parameter list) // const int& stream = 0, const int& stream, diff --git a/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser_impl.h b/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser_impl.h index 6d1ebe0cfe2f705e4e11430d6b1619f6c37fd0cd..f3d9fbeec528dae97e4f3304f44b8440318d4529 100644 --- a/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser_impl.h +++ b/tests/benchmarks/heat-equation-benchmark/Tuning/GridTraverser_impl.h @@ -22,7 +22,8 @@ namespace TNL { * 2D traverser, host */ template< typename Real, - typename Index > + typename Index, + typename Cell > template< typename GridEntity, typename EntitiesProcessor, @@ -32,12 +33,12 @@ template< typename Real, int YOrthogonalBoundary, typename... GridEntityParameters > void -GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index > >:: +GridTraverser< Meshes::Grid< 2, Real, Devices::Host, Index >, Cell >:: processEntities( const GridPointer& gridPointer, const CoordinatesType begin, const CoordinatesType end, - Pointers::SharedPointer< UserData, DeviceType >& userDataPointer, + UserData& userData, const int& stream, const GridEntityParameters&... gridEntityParameters ) { @@ -52,10 +53,10 @@ processEntities( { entity.getCoordinates().y() = begin.y(); entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity ); + EntitiesProcessor::processEntity( entity.getMesh(), userData, entity ); entity.getCoordinates().y() = end.y(); entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity ); + EntitiesProcessor::processEntity( entity.getMesh(), userData, entity ); } if( XOrthogonalBoundary ) for( entity.getCoordinates().y() = begin.y(); @@ -64,10 +65,10 @@ processEntities( { entity.getCoordinates().x() = begin.x(); entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity ); + EntitiesProcessor::processEntity( entity.getMesh(), userData, entity ); entity.getCoordinates().x() = end.x(); entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity ); + EntitiesProcessor::processEntity( entity.getMesh(), userData, entity ); } } else @@ -98,7 +99,7 @@ processEntities( entity.getCoordinates().x() = x; entity.getCoordinates().y() = y; entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *userDataPointer, entity ); + EntitiesProcessor::processEntity( entity.getMesh(), userData, entity ); } } } @@ -134,9 +135,9 @@ _GridTraverser2D( { //GridEntity entity( *grid, coordinates, gridEntityParameters... ); //entity.refresh(); - if( ! processOnlyBoundaryEntities || + /*if( ! processOnlyBoundaryEntities || ( coordinates.x() == 0 || coordinates.y() == 0 || - coordinates.x() == grid->getDimensions().x() - 1 || coordinates.y() == grid->getDimensions().y() - 1 ) ) + coordinates.x() == grid->getDimensions().x() - 1 || coordinates.y() == grid->getDimensions().y() - 1 ) )*/ //entity.isBoundaryEntity() ) { EntitiesProcessor::processEntity @@ -149,6 +150,7 @@ _GridTraverser2D( } } + template< typename Real, typename Index, typename GridEntity, @@ -157,70 +159,68 @@ template< typename Real, bool processOnlyBoundaryEntities, typename... GridEntityParameters > __global__ void -_GridTraverser2DBoundaryAlongX( +_GridTraverser2DBoundary( const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid, - UserData* userData, + UserData userData, const Index beginX, const Index endX, - const Index fixedY, - const dim3 gridIdx, - const GridEntityParameters... gridEntityParameters ) -{ - typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; - typename GridType::CoordinatesType coordinates; - - coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); - coordinates.y() = fixedY; - - if( coordinates.x() <= endX ) - { - GridEntity entity( *grid, coordinates, gridEntityParameters... ); - entity.refresh(); - EntitiesProcessor::processEntity - ( *grid, - *userData, - entity ); - } -} - -template< typename Real, - typename Index, - typename GridEntity, - typename UserData, - typename EntitiesProcessor, - bool processOnlyBoundaryEntities, - typename... GridEntityParameters > -__global__ void -_GridTraverser2DBoundaryAlongY( - const Meshes::Grid< 2, Real, Devices::Cuda, Index >* grid, - UserData* userData, const Index beginY, const Index endY, - const Index fixedX, const dim3 gridIdx, const GridEntityParameters... gridEntityParameters ) { - typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; - typename GridType::CoordinatesType coordinates; - - coordinates.x() = fixedX; - coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + using GridType = Meshes::Grid< 2, Real, Devices::Cuda, Index >; + using CoordinatesType = typename GridType::CoordinatesType; - if( coordinates.y() <= endY ) + Index entitiesAlongX = endX - beginX + 1; + Index entitiesAlongY = endY - beginY; + + Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + if( threadId < entitiesAlongX ) { - GridEntity entity( *grid, coordinates, gridEntityParameters... ); + GridEntity entity( *grid, + CoordinatesType( beginX + threadId, beginY ), + gridEntityParameters... ); + //printf( "X1: Thread %d -> %d %d x %d %d \n ", threadId, + // entity.getCoordinates().x(), entity.getCoordinates().y(), + // grid->getDimensions().x(), grid->getDimensions().y() ); entity.refresh(); - EntitiesProcessor::processEntity - ( *grid, - *userData, - entity ); - } + EntitiesProcessor::processEntity( *grid, userData, entity ); + } + else if( ( threadId -= entitiesAlongX ) < entitiesAlongX && threadId >= 0 ) + { + GridEntity entity( *grid, + CoordinatesType( beginX + threadId, endY ), + gridEntityParameters... ); + entity.refresh(); + //printf( "X2: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() ); + EntitiesProcessor::processEntity( *grid, userData, entity ); + } + else if( ( ( threadId -= entitiesAlongX ) < entitiesAlongY - 1 ) && threadId >= 0 ) + { + GridEntity entity( *grid, + CoordinatesType( beginX, beginY + threadId + 1 ), + gridEntityParameters... ); + entity.refresh(); + //printf( "Y1: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() ); + EntitiesProcessor::processEntity( *grid, userData, entity ); + } + else if( ( ( threadId -= entitiesAlongY - 1 ) < entitiesAlongY - 1 ) && threadId >= 0 ) + { + GridEntity entity( *grid, + CoordinatesType( endX, beginY + threadId + 1 ), + gridEntityParameters... ); + entity.refresh(); + //printf( "Y2: Thread %d -> %d %d \n ", threadId, entity.getCoordinates().x(), entity.getCoordinates().y() ); + EntitiesProcessor::processEntity( *grid, userData, entity ); + } } #endif template< typename Real, - typename Index > + typename Index, + typename Cell > template< typename GridEntity, typename EntitiesProcessor, @@ -230,12 +230,12 @@ template< typename Real, int YOrthogonalBoundary, typename... GridEntityParameters > void -GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > >:: +GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index >, Cell >:: processEntities( const GridPointer& gridPointer, const CoordinatesType& begin, const CoordinatesType& end, - Pointers::SharedPointer< UserData, DeviceType >& userDataPointer, + UserData& userData, const int& stream, const GridEntityParameters&... gridEntityParameters ) { @@ -243,70 +243,26 @@ processEntities( if( processOnlyBoundaryEntities && ( GridEntity::getEntityDimension() == 2 || GridEntity::getEntityDimension() == 0 ) ) { - dim3 cudaBlockSize( 256 ); - dim3 cudaBlocksCountAlongX, cudaGridsCountAlongX, - cudaBlocksCountAlongY, cudaGridsCountAlongY; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 ); - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 ); - - auto& pool = CudaStreamPool::getInstance(); - Devices::Cuda::synchronizeDevice(); - - const cudaStream_t& s1 = pool.getStream( stream ); - const cudaStream_t& s2 = pool.getStream( stream + 1 ); + dim3 cudaBlockSize( 256 ); + dim3 cudaBlocksCount, cudaGridsCount; + IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 ); + Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount ); dim3 gridIdx, cudaGridSize; - for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ ) + Devices::Cuda::synchronizeDevice(); + for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize ); - //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); - TNL::_GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > - <<< cudaGridSize, cudaBlockSize, 0, s1 >>> + Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); + _GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > + <<< cudaGridSize, cudaBlockSize >>> ( &gridPointer.template getData< Devices::Cuda >(), - &userDataPointer.template modifyData< Devices::Cuda >(), + userData, begin.x(), end.x(), begin.y(), - gridIdx, - gridEntityParameters... ); - TNL::_GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > - <<< cudaGridSize, cudaBlockSize, 0, s2 >>> - ( &gridPointer.template getData< Devices::Cuda >(), - &userDataPointer.template modifyData< Devices::Cuda >(), - begin.x(), - end.x(), end.y(), gridIdx, gridEntityParameters... ); - } - const cudaStream_t& s3 = pool.getStream( stream + 2 ); - const cudaStream_t& s4 = pool.getStream( stream + 3 ); - for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ ) - { - Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize ); - TNL::_GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > - <<< cudaGridSize, cudaBlockSize, 0, s3 >>> - ( &gridPointer.template getData< Devices::Cuda >(), - &userDataPointer.template modifyData< Devices::Cuda >(), - begin.y() + 1, - end.y() - 1, - begin.x(), - gridIdx, - gridEntityParameters... ); - TNL::_GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > - <<< cudaGridSize, cudaBlockSize, 0, s4 >>> - ( &gridPointer.template getData< Devices::Cuda >(), - &userDataPointer.template modifyData< Devices::Cuda >(), - begin.y() + 1, - end.y() - 1, - end.x(), - gridIdx, - gridEntityParameters... ); - } - cudaStreamSynchronize( s1 ); - cudaStreamSynchronize( s2 ); - cudaStreamSynchronize( s3 ); - cudaStreamSynchronize( s4 ); - TNL_CHECK_CUDA_DEVICE; + } } else { @@ -329,7 +285,7 @@ processEntities( TNL::_GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s >>> ( &gridPointer.template getData< Devices::Cuda >(), - &userDataPointer.template modifyData< Devices::Cuda >(), + &userData, begin, end, gridIdx, diff --git a/tests/benchmarks/heat-equation-benchmark/Tuning/SimpleCell.h b/tests/benchmarks/heat-equation-benchmark/Tuning/SimpleCell.h index b581ddd57c3336bf828e6583d6520dc868fc4218..67254ab3607c9c318f8d6a624387c5d875ee2484 100644 --- a/tests/benchmarks/heat-equation-benchmark/Tuning/SimpleCell.h +++ b/tests/benchmarks/heat-equation-benchmark/Tuning/SimpleCell.h @@ -10,7 +10,12 @@ #pragma once -template< typename Grid > +#include <TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h> +#include <TNL/Meshes/GridEntityConfig.h> + +#define SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE + +template< typename Grid, typename Config = Meshes::GridEntityNoStencilStorage > class SimpleCell { public: @@ -21,21 +26,34 @@ class SimpleCell typedef typename GridType::IndexType IndexType; typedef typename GridType::CoordinatesType CoordinatesType; typedef typename GridType::PointType PointType; - + typedef SimpleCell< GridType, Config > ThisType; + typedef Meshes::NeighborGridEntitiesStorage< ThisType, Config > + NeighborGridEntitiesStorageType; + typedef Config ConfigType; + constexpr static int getMeshDimension() { return GridType::getMeshDimension(); }; constexpr static int getEntityDimension() { return getMeshDimension(); }; - - typedef SimpleCell< GridType > ThisType; - + __cuda_callable__ inline - SimpleCell( const GridType& grid ) :grid( grid ) {}; + SimpleCell( const GridType& grid ) + :grid( grid ) +#ifdef SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE + , neighborEntitiesStorage( *this ) +#endif + {}; - /*__cuda_callable__ inline + __cuda_callable__ inline SimpleCell( const GridType& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ), - const EntityBasisType& basis = EntityBasisType( ( Index ) 1 ) );*/ + const CoordinatesType& orientation = CoordinatesType( ( IndexType ) 0 ), + const CoordinatesType& basis = CoordinatesType( ( IndexType ) 1 ) ) + : grid( grid ), + coordinates( coordinates ) +#ifdef SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE + , neighborEntitiesStorage( *this ) +#endif + {}; __cuda_callable__ inline const CoordinatesType& getCoordinates() const { return this->coordinates; }; @@ -52,7 +70,11 @@ class SimpleCell * mechanism is a performance. */ __cuda_callable__ inline - void refresh() { this->entityIndex = this->grid.getEntityIndex( *this ); }; + void refresh() + { + this->entityIndex = this->grid.getEntityIndex( *this ); + this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex ); + }; __cuda_callable__ inline IndexType getIndex() const { return this->entityIndex; }; @@ -73,10 +95,17 @@ class SimpleCell __cuda_callable__ inline const NeighborEntities< NeighborEntityDimension >& getNeighborEntities() const; - + */ __cuda_callable__ inline - bool isBoundaryEntity() const; - */ + bool isBoundaryEntity() const + { + return false; + /*return( this->getCoordinates().x() == 0 || + this->getCoordinates().y() == 0 || + this->getCoordinates().x() == this->getMesh().getDimensions().x() - 1 || + this->getCoordinates().y() == this->getMesh().getDimensions().y() - 1 );*/ + }; + __cuda_callable__ inline PointType getCenter() const @@ -102,6 +131,11 @@ class SimpleCell IndexType entityIndex; CoordinatesType coordinates; + +#ifdef SIMPLE_CELL_HAVE_NEIGHBOR_ENTITIES_STORAGE + NeighborGridEntitiesStorageType neighborEntitiesStorage; +#endif + // TODO: Test of boundary entity will likely be more // complicated with MPI. It might be more efficient to resolve it diff --git a/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D.h b/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D.h index bc04fe07db1eebb6157a5b8bd013cd15218689ca..8a5fcbb29a2915cbf45bf39ea9be052e58d7052f 100644 --- a/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D.h +++ b/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D.h @@ -28,17 +28,17 @@ class Traverser template< typename UserData, typename EntitiesProcessor > void processBoundaryEntities( const MeshPointer& meshPointer, - Pointers::SharedPointer< UserData, DeviceType >& userDataPointer ) const; + UserData& userData ) const; template< typename UserData, typename EntitiesProcessor > void processInteriorEntities( const MeshPointer& meshPointer, - Pointers::SharedPointer< UserData, DeviceType >& userDataPointer ) const; + UserData& userData ) const; template< typename UserData, typename EntitiesProcessor > void processAllEntities( const MeshPointer& meshPointer, - Pointers::SharedPointer< UserData, DeviceType >& userDataPointer ) const; + UserData& userData ) const; }; template< typename Real, @@ -55,16 +55,16 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 > template< typename UserData, typename EntitiesProcessor > void processBoundaryEntities( const GridPointer& gridPointer, - Pointers::SharedPointer< UserData, Device >& userDataPointer ) const; + UserData& userData ) const; template< typename UserData, typename EntitiesProcessor > void processInteriorEntities( const GridPointer& gridPointer, - Pointers::SharedPointer< UserData, Device >& userDataPointer ) const; + UserData& userData ) const; template< typename UserData, typename EntitiesProcessor > void processAllEntities( const GridPointer& gridPointer, - Pointers::SharedPointer< UserData, Device >& userDataPointer ) const; + UserData& userData ) const; }; diff --git a/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D_impl.h b/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D_impl.h index 530b133bab357516e34ff12ed9f1412bcce487c6..16fc3a83d980763891adf1a5064054eb215085b0 100644 --- a/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D_impl.h +++ b/tests/benchmarks/heat-equation-benchmark/Tuning/Traverser_Grid2D_impl.h @@ -26,18 +26,18 @@ template< typename Real, void Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >:: processBoundaryEntities( const GridPointer& gridPointer, - Pointers::SharedPointer< UserData, Device >& userDataPointer ) const + UserData& userData ) const { /**** * Boundary cells */ static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." ); - GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 1 >( + GridTraverser< GridType,GridEntity >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 1 >( gridPointer, CoordinatesType( 0, 0 ), gridPointer->getDimensions() - CoordinatesType( 1, 1 ), - userDataPointer, + userData, 0 ); } @@ -50,18 +50,18 @@ template< typename Real, void Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >:: processInteriorEntities( const GridPointer& gridPointer, - Pointers::SharedPointer< UserData, Device >& userDataPointer ) const + UserData& userData ) const { /**** * Interior cells */ static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." ); - GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >( + GridTraverser< GridType, GridEntity >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >( gridPointer, CoordinatesType( 1, 1 ), gridPointer->getDimensions() - CoordinatesType( 2, 2 ), - userDataPointer, + userData, 0 ); } @@ -74,18 +74,18 @@ template< typename Real, void Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >:: processAllEntities( const GridPointer& gridPointer, - Pointers::SharedPointer< UserData, Device >& userDataPointer ) const + UserData& userData ) const { /**** * All cells */ static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." ); - GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >( + GridTraverser< GridType, GridEntity >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >( gridPointer, CoordinatesType( 0, 0 ), gridPointer->getDimensions() - CoordinatesType( 1, 1 ), - userDataPointer, + userData, 0 ); }