update for TNL changes (c63bfb61) · Commits · TNL / tnl-benchmark-mesh

Makefile

+4 −1

Original line number	Diff line number	Diff line
		@@ -22,7 +22,10 @@ cuda: tnl-benchmark-mesh-cuda
		tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cu.o $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cu.o)
		$(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS)

		clean:
		clean: clean_templates
		.PHONY: clean_templates
		clean_templates:
		$(RM) tnl-benchmark-mesh tnl-benchmark-mesh-cuda
		$(RM) -r MeshBenchmarks.templates/

		-include $(SOURCES:%.cpp=%.d)

Makefile.base

+13 −2

Original line number	Diff line number	Diff line
		@@ -33,24 +33,35 @@ endif

		## essential host options
		CPPFLAGS = -MD -MP
		CXXFLAGS = -std=c++11 -Wall -Wno-unused-local-typedefs -Wno-unused-variable
		CXXFLAGS = -std=c++14 -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-deprecated
		LDFLAGS = -pthread
		LDLIBS =

		ifeq ($(BUILD),Release)
		CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG
		#CXXFLAGS += -fopt-info-vec-optimized=explicit.log -ftree-vectorize
		# enable link-time optimization
		ifeq ($(CXX),clang++)
		CXXFLAGS += -flto=thin
		LDFLAGS += -O3 -march=native -mtune=native -flto=thin
		endif
		else ifeq ($(BUILD),Debug)
		CXXFLAGS += -Og -g
		LDFLAGS += -rdynamic
		endif


		## use the LLVM linker with clang
		ifeq ($(CXX),clang++)
		LDFLAGS += -fuse-ld=lld
		endif


		## options for nvcc
		# automatic dependency generation for nvcc (gcc has it automated in the pre-processor phase with the -MD flag)
		# note that $@ is expanded only when $(CUDA_CPPFLAGS) is used
		CUDA_CPPFLAGS = --compiler-options -MD,-MP,-MT$@
		CUDA_CXXFLAGS = -std=c++11 --compiler-bindir $(CUDA_HOST_COMPILER)
		CUDA_CXXFLAGS = -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER)
		CUDA_LDLIBS =
		# disable false compiler warnings
		# reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910

MeshBenchmarks.h

+50 −44

Original line number	Diff line number	Diff line
		@@ -17,9 +17,11 @@
		#include <TNL/Meshes/Geometry/getEntityCenter.h>
		#include <TNL/Meshes/Geometry/getEntityMeasure.h>
		#include <TNL/Meshes/TypeResolver/TypeResolver.h>
		#include <TNL/DevicePointer.h>
		#include <TNL/ParallelFor.h>
		#include <TNL/StaticFor.h>
		#include <TNL/Pointers/DevicePointer.h>
		#include <TNL/Algorithms/ParallelFor.h>
		#include <TNL/Algorithms/TemplateStaticFor.h>
		#include <TNL/Benchmarks/Benchmarks.h>
		#include <TNL/Communicators/NoDistrCommunicator.h>

		#ifdef HAVE_CUDA
		#include <cuda_profiler_api.h>
		@@ -28,11 +30,10 @@
		#include "MeshOrdering.h"

		#include "MeshConfigs.h"
		#include "tnl_benchmarks.h"

		using namespace TNL;
		using namespace TNL::Meshes;
		using namespace TNL::benchmarks;
		using namespace TNL::Benchmarks;

		template< typename Real >
		__cuda_callable__
		@@ -72,7 +73,7 @@ struct MeshBenchmarks
		Benchmark::MetadataColumns metadataColumns = {
		// {"mesh-file", meshFile},
		{"config", Mesh::Config::getConfigType()},
		{"topology", Mesh::Config::CellTopology::getType().replace("Topologies::", "")},
		{"topology", getType< typename Mesh::Config::CellTopology >().replace("Topologies::", "")},
		// {"wrld dim", worldDimension},
		{"real", getType< typename Mesh::RealType >()},
		{"gid_t", getType< typename Mesh::GlobalIndexType >()},
		@@ -82,7 +83,8 @@ struct MeshBenchmarks
		};

		Mesh mesh;
		if( ! loadMesh( meshFile, mesh ) ) {
		DistributedMeshes::DistributedMesh<Mesh> distributedMesh;
		if( ! loadMesh< Communicators::NoDistrCommunicator >( meshFile, mesh, distributedMesh ) ) {
		std::cerr << "Failed to load mesh from file '" << meshFile << "'." << std::endl;
		return false;
		}
		@@ -121,8 +123,8 @@ struct MeshBenchmarks

		static void dispatchAlgorithms( Benchmark & benchmark, const Mesh & mesh )
		{
		StaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh );
		StaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh );
		Algorithms::TemplateStaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh );
		Algorithms::TemplateStaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh );
		DualMeasuresDispatch::exec( benchmark, mesh );
		SpheresDispatch::exec( benchmark, mesh );
		}
		@@ -134,7 +136,7 @@ struct MeshBenchmarks
		typename = typename std::enable_if< M::template entitiesAvailable< EntityDimension >() >::type >
		static void exec( Benchmark & benchmark, const M & mesh )
		{
		benchmark.setOperation( String("Centers (d = ") + String(EntityDimension) + ")" );
		benchmark.setOperation( String("Centers (d = ") + convertToString(EntityDimension) + ")" );
		benchmark_centers< EntityDimension, Devices::Host >( benchmark, mesh );
		#ifdef HAVE_CUDA
		benchmark_centers< EntityDimension, Devices::Cuda >( benchmark, mesh );
		@@ -156,7 +158,7 @@ struct MeshBenchmarks
		typename = typename std::enable_if< M::template entitiesAvailable< EntityDimension >() >::type >
		static void exec( Benchmark & benchmark, const M & mesh )
		{
		benchmark.setOperation( String("Measures (d = ") + String(EntityDimension) + ")" );
		benchmark.setOperation( String("Measures (d = ") + convertToString(EntityDimension) + ")" );
		benchmark_measures< EntityDimension, Devices::Host >( benchmark, mesh );
		#ifdef HAVE_CUDA
		benchmark_measures< EntityDimension, Devices::Cuda >( benchmark, mesh );
		@@ -238,9 +240,9 @@ struct MeshBenchmarks
		const Index entitiesCount = mesh_src.template getEntitiesCount< EntityDimension >();

		const DeviceMesh mesh = mesh_src;
		DevicePointer< const DeviceMesh > meshPointer( mesh );
		Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
		Containers::Array< PointType, Device, Index > centers;
		centers.setSize( PointType::size * entitiesCount );
		centers.setSize( PointType::getSize() * entitiesCount );

		auto kernel_measures = [] __cuda_callable__
		( Index i,
		@@ -256,13 +258,14 @@ struct MeshBenchmarks
		};

		auto benchmark_func = [&] () {
		ParallelFor< Device >::exec( (Index) 0, entitiesCount,
		Algorithms::ParallelFor< Device >::exec(
		(Index) 0, entitiesCount,
		kernel_measures,
		&meshPointer.template getData< Device >(),
		centers.getData() );
		};

		benchmark.time( reset,
		benchmark.time< Device >( reset,
		(std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
		benchmark_func );
		}
		@@ -277,7 +280,7 @@ struct MeshBenchmarks
		const Index entitiesCount = mesh_src.template getEntitiesCount< EntityDimension >();

		const DeviceMesh mesh = mesh_src;
		DevicePointer< const DeviceMesh > meshPointer( mesh );
		Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
		Containers::Array< Real, Device, Index > measures;
		measures.setSize( entitiesCount );

		@@ -295,13 +298,14 @@ struct MeshBenchmarks
		};

		auto benchmark_func = [&] () {
		ParallelFor< Device >::exec( (Index) 0, entitiesCount,
		Algorithms::ParallelFor< Device >::exec(
		(Index) 0, entitiesCount,
		kernel_measures,
		&meshPointer.template getData< Device >(),
		measures.getData() );
		};

		benchmark.time( reset,
		benchmark.time< Device >( reset,
		(std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
		benchmark_func );
		}
		@@ -323,7 +327,7 @@ struct MeshBenchmarks
		const Index entitiesCount = mesh_src.template getEntitiesCount< Mesh::getMeshDimension() >();

		const DeviceMesh mesh = mesh_src;
		DevicePointer< const DeviceMesh > meshPointer( mesh );
		Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
		Containers::Array< Real, Device, Index > measures;
		measures.setSize( entitiesCount );

		@@ -360,13 +364,14 @@ struct MeshBenchmarks
		};

		auto benchmark_func = [&] () {
		ParallelFor< Device >::exec( (Index) 0, entitiesCount,
		Algorithms::ParallelFor< Device >::exec(
		(Index) 0, entitiesCount,
		kernel_measures,
		&meshPointer.template getData< Device >(),
		measures.getData() );
		};

		benchmark.time( reset,
		benchmark.time< Device >( reset,
		(std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
		benchmark_func );
		}
		@@ -386,7 +391,7 @@ struct MeshBenchmarks
		const Index entitiesCount = mesh_src.template getEntitiesCount< 0 >();

		const DeviceMesh mesh = mesh_src;
		DevicePointer< const DeviceMesh > meshPointer( mesh );
		Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
		Containers::Array< Real, Device, Index > spheres;
		spheres.setSize( entitiesCount );

		@@ -452,13 +457,14 @@ struct MeshBenchmarks
		};

		auto benchmark_func = [&] () {
		ParallelFor< Device >::exec( (Index) 0, entitiesCount,
		Algorithms::ParallelFor< Device >::exec(
		(Index) 0, entitiesCount,
		kernel_spheres,
		&meshPointer.template getData< Device >(),
		spheres.getData() );
		};

		benchmark.time( reset,
		benchmark.time< Device >( reset,
		(std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
		benchmark_func );
		}

MeshConfigs.h

+3 −26

Original line number	Diff line number	Diff line
		@@ -13,7 +13,6 @@
		#pragma once

		#include <TNL/String.h>
		#include <TNL/param-types.h>
		#include <TNL/Meshes/Topologies/SubentityVertexMap.h>

		template< typename Cell,
		@@ -33,17 +32,6 @@ struct FullConfig
		static constexpr int worldDimension = WorldDimension;
		static constexpr int meshDimension = Cell::dimension;

		static TNL::String getType()
		{
		return TNL::String( "Meshes::FullConfig< " ) +
		Cell::getType() + ", " +
		TNL::String( WorldDimension ) + ", " +
		TNL::getType< Real >() + ", " +
		TNL::getType< GlobalIndex >() + ", " +
		TNL::getType< LocalIndex >() + ", " +
		TNL::getType< Id >() + " >";
		}

		static TNL::String getConfigType()
		{
		return "Full";
		@@ -118,17 +106,6 @@ struct MinimalConfig
		static constexpr int worldDimension = WorldDimension;
		static constexpr int meshDimension = Cell::dimension;

		static TNL::String getType()
		{
		return TNL::String( "Meshes::MinimalConfig< " ) +
		Cell::getType() + ", " +
		TNL::String( WorldDimension ) + ", " +
		TNL::getType< Real >() + ", " +
		TNL::getType< GlobalIndex >() + ", " +
		TNL::getType< LocalIndex >() + ", " +
		TNL::getType< Id >() + " >";
		}

		static TNL::String getConfigType()
		{
		return "Minimal";

MeshOrdering.h

+6 −6

Original line number	Diff line number	Diff line
		@@ -27,7 +27,7 @@ struct KdTreeOrdering
		perm.setSize( numberOfEntities );
		iperm.setSize( numberOfEntities );

		spatial::idle_point_multimap< PointType::size, PointType, IndexType > container;
		spatial::idle_point_multimap< PointType::getSize(), PointType, IndexType > container;

		for( IndexType i = 0; i < numberOfEntities; i++ ) {
		const auto& entity = mesh.template getEntity< MeshEntity >( i );
		@@ -312,7 +312,7 @@ public:
		Vector tmp;
		tmp.setLike( vector );

		ParallelFor< Device >::exec( (IndexType) 0, vector.getSize(),
		Algorithms::ParallelFor< Device >::exec( (IndexType) 0, vector.getSize(),
		kernel,
		vector.getData(),
		tmp.getData(),