Loading benchmarks/MeshBenchmarks.h +122 −3 Original line number Original line Diff line number Diff line Loading @@ -34,6 +34,30 @@ using namespace TNL; using namespace TNL::Meshes; using namespace TNL::Meshes; using namespace TNL::benchmarks; using namespace TNL::benchmarks; template< typename Real > __cuda_callable__ Real getSimplexMeasure( const TNL::Containers::StaticVector< 1, Real > (& points) [2] ) { return getVectorLength( points[0] - points[1] ); } template< typename Real > __cuda_callable__ Real getSimplexMeasure( const TNL::Containers::StaticVector< 2, Real > (& points) [3] ) { return getTriangleArea( points[0], points[1] ); } template< typename Real > __cuda_callable__ Real getSimplexMeasure( const TNL::Containers::StaticVector< 3, Real > (& points) [4] ) { return getTetrahedronVolume( points[0], points[1], points[2] ); } template< typename Mesh > template< typename Mesh > struct MeshBenchmarks struct MeshBenchmarks { { Loading Loading @@ -99,6 +123,7 @@ struct MeshBenchmarks { { StaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh ); StaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh ); StaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh ); StaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh ); DualMeasuresDispatch::exec( benchmark, mesh ); SpheresDispatch::exec( benchmark, mesh ); SpheresDispatch::exec( benchmark, mesh ); } } Loading Loading @@ -128,6 +153,35 @@ struct MeshBenchmarks } } }; }; struct DualMeasuresDispatch { template< typename M, typename = typename std::enable_if< std::is_same< typename M::Config::CellTopology, Topologies::Edge >::value || std::is_same< typename M::Config::CellTopology, Topologies::Triangle >::value || std::is_same< typename M::Config::CellTopology, Topologies::Tetrahedron >::value >::type > static void exec( Benchmark & benchmark, const M & mesh ) { benchmark.setOperation( "Dual M" ); benchmark_dual_measures< Devices::Host >( benchmark, mesh ); #ifdef HAVE_CUDA benchmark_dual_measures< Devices::Cuda >( benchmark, mesh ); #endif } template< typename M, typename = typename std::enable_if< !( std::is_same< typename M::Config::CellTopology, Topologies::Edge >::value || std::is_same< typename M::Config::CellTopology, Topologies::Triangle >::value || std::is_same< typename M::Config::CellTopology, Topologies::Tetrahedron >::value ) >::type, typename = void > static void exec( Benchmark & benchmark, const M & mesh ) { } }; struct SpheresDispatch struct SpheresDispatch { { template< typename M, template< typename M, Loading Loading @@ -234,6 +288,71 @@ struct MeshBenchmarks benchmark_func ); benchmark_func ); } } template< typename Device > static void benchmark_dual_measures( Benchmark & benchmark, const Mesh & mesh_src ) { static_assert( std::is_same< typename Mesh::Config::CellTopology, Topologies::Edge >::value || std::is_same< typename Mesh::Config::CellTopology, Topologies::Triangle >::value || std::is_same< typename Mesh::Config::CellTopology, Topologies::Tetrahedron >::value, "The algorithm works only on simplices." ); using Real = typename Mesh::RealType; using Index = typename Mesh::GlobalIndexType; using LocalIndex = typename Mesh::LocalIndexType; using PointType = typename Mesh::PointType; using DeviceMesh = Meshes::Mesh< typename Mesh::Config, Device >; const Index entitiesCount = mesh_src.template getEntitiesCount< Mesh::getMeshDimension() >(); const DeviceMesh mesh = mesh_src; DevicePointer< const DeviceMesh > meshPointer( mesh ); Containers::Array< Real, Device, Index > measures; measures.setSize( entitiesCount ); auto kernel_measures = [] __cuda_callable__ ( Index i, const DeviceMesh* mesh, Real* array ) { const auto& entity = mesh->template getEntity< Mesh::getMeshDimension() >( i ); constexpr auto facesCount = Mesh::Cell::template getSubentitiesCount< Mesh::getMeshDimension() - 1 >(); PointType centers[ facesCount ]; for( LocalIndex f = 0; f < facesCount; f++ ) { const auto fid = entity.template getSubentityIndex< Mesh::getMeshDimension() - 1 >( f ); const auto& face = mesh->template getEntity< Mesh::getMeshDimension() - 1 >( fid ); const auto _cells = face.template getSuperentitiesCount< Mesh::getMeshDimension() >(); if( _cells == 1 ) // boundary face - take the face center instead of the neighbor centers[ f ] = getEntityCenter( *mesh, face ); else for( LocalIndex c = 0; c < _cells; c++ ) { const auto cid = face.template getSuperentityIndex< Mesh::getMeshDimension() >( c ); if( cid != i ) { const auto& cell = mesh->template getEntity< Mesh::getMeshDimension() >( cid ); centers[ f ] = getEntityCenter( *mesh, cell ); } } } array[ i ] = getSimplexMeasure( centers ); }; auto reset = [&]() { measures.setValue( 0.0 ); }; auto benchmark_func = [&] () { ParallelFor< Device >::exec( (Index) 0, entitiesCount, kernel_measures, &meshPointer.template getData< Device >(), measures.getData() ); }; benchmark.time( reset, (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU", benchmark_func ); } template< typename Device > template< typename Device > static void benchmark_spheres( Benchmark & benchmark, const Mesh & mesh_src ) static void benchmark_spheres( Benchmark & benchmark, const Mesh & mesh_src ) { { Loading Loading
benchmarks/MeshBenchmarks.h +122 −3 Original line number Original line Diff line number Diff line Loading @@ -34,6 +34,30 @@ using namespace TNL; using namespace TNL::Meshes; using namespace TNL::Meshes; using namespace TNL::benchmarks; using namespace TNL::benchmarks; template< typename Real > __cuda_callable__ Real getSimplexMeasure( const TNL::Containers::StaticVector< 1, Real > (& points) [2] ) { return getVectorLength( points[0] - points[1] ); } template< typename Real > __cuda_callable__ Real getSimplexMeasure( const TNL::Containers::StaticVector< 2, Real > (& points) [3] ) { return getTriangleArea( points[0], points[1] ); } template< typename Real > __cuda_callable__ Real getSimplexMeasure( const TNL::Containers::StaticVector< 3, Real > (& points) [4] ) { return getTetrahedronVolume( points[0], points[1], points[2] ); } template< typename Mesh > template< typename Mesh > struct MeshBenchmarks struct MeshBenchmarks { { Loading Loading @@ -99,6 +123,7 @@ struct MeshBenchmarks { { StaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh ); StaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh ); StaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh ); StaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh ); DualMeasuresDispatch::exec( benchmark, mesh ); SpheresDispatch::exec( benchmark, mesh ); SpheresDispatch::exec( benchmark, mesh ); } } Loading Loading @@ -128,6 +153,35 @@ struct MeshBenchmarks } } }; }; struct DualMeasuresDispatch { template< typename M, typename = typename std::enable_if< std::is_same< typename M::Config::CellTopology, Topologies::Edge >::value || std::is_same< typename M::Config::CellTopology, Topologies::Triangle >::value || std::is_same< typename M::Config::CellTopology, Topologies::Tetrahedron >::value >::type > static void exec( Benchmark & benchmark, const M & mesh ) { benchmark.setOperation( "Dual M" ); benchmark_dual_measures< Devices::Host >( benchmark, mesh ); #ifdef HAVE_CUDA benchmark_dual_measures< Devices::Cuda >( benchmark, mesh ); #endif } template< typename M, typename = typename std::enable_if< !( std::is_same< typename M::Config::CellTopology, Topologies::Edge >::value || std::is_same< typename M::Config::CellTopology, Topologies::Triangle >::value || std::is_same< typename M::Config::CellTopology, Topologies::Tetrahedron >::value ) >::type, typename = void > static void exec( Benchmark & benchmark, const M & mesh ) { } }; struct SpheresDispatch struct SpheresDispatch { { template< typename M, template< typename M, Loading Loading @@ -234,6 +288,71 @@ struct MeshBenchmarks benchmark_func ); benchmark_func ); } } template< typename Device > static void benchmark_dual_measures( Benchmark & benchmark, const Mesh & mesh_src ) { static_assert( std::is_same< typename Mesh::Config::CellTopology, Topologies::Edge >::value || std::is_same< typename Mesh::Config::CellTopology, Topologies::Triangle >::value || std::is_same< typename Mesh::Config::CellTopology, Topologies::Tetrahedron >::value, "The algorithm works only on simplices." ); using Real = typename Mesh::RealType; using Index = typename Mesh::GlobalIndexType; using LocalIndex = typename Mesh::LocalIndexType; using PointType = typename Mesh::PointType; using DeviceMesh = Meshes::Mesh< typename Mesh::Config, Device >; const Index entitiesCount = mesh_src.template getEntitiesCount< Mesh::getMeshDimension() >(); const DeviceMesh mesh = mesh_src; DevicePointer< const DeviceMesh > meshPointer( mesh ); Containers::Array< Real, Device, Index > measures; measures.setSize( entitiesCount ); auto kernel_measures = [] __cuda_callable__ ( Index i, const DeviceMesh* mesh, Real* array ) { const auto& entity = mesh->template getEntity< Mesh::getMeshDimension() >( i ); constexpr auto facesCount = Mesh::Cell::template getSubentitiesCount< Mesh::getMeshDimension() - 1 >(); PointType centers[ facesCount ]; for( LocalIndex f = 0; f < facesCount; f++ ) { const auto fid = entity.template getSubentityIndex< Mesh::getMeshDimension() - 1 >( f ); const auto& face = mesh->template getEntity< Mesh::getMeshDimension() - 1 >( fid ); const auto _cells = face.template getSuperentitiesCount< Mesh::getMeshDimension() >(); if( _cells == 1 ) // boundary face - take the face center instead of the neighbor centers[ f ] = getEntityCenter( *mesh, face ); else for( LocalIndex c = 0; c < _cells; c++ ) { const auto cid = face.template getSuperentityIndex< Mesh::getMeshDimension() >( c ); if( cid != i ) { const auto& cell = mesh->template getEntity< Mesh::getMeshDimension() >( cid ); centers[ f ] = getEntityCenter( *mesh, cell ); } } } array[ i ] = getSimplexMeasure( centers ); }; auto reset = [&]() { measures.setValue( 0.0 ); }; auto benchmark_func = [&] () { ParallelFor< Device >::exec( (Index) 0, entitiesCount, kernel_measures, &meshPointer.template getData< Device >(), measures.getData() ); }; benchmark.time( reset, (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU", benchmark_func ); } template< typename Device > template< typename Device > static void benchmark_spheres( Benchmark & benchmark, const Mesh & mesh_src ) static void benchmark_spheres( Benchmark & benchmark, const Mesh & mesh_src ) { { Loading