Commit c63bfb61 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

update for TNL changes

parent e0de6ea6
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -22,7 +22,10 @@ cuda: tnl-benchmark-mesh-cuda
tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cu.o $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cu.o)
	$(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS)

clean:
clean: clean_templates
.PHONY: clean_templates
clean_templates:
	$(RM) tnl-benchmark-mesh tnl-benchmark-mesh-cuda
	$(RM) -r MeshBenchmarks.templates/

-include $(SOURCES:%.cpp=%.d)
+13 −2
Original line number Diff line number Diff line
@@ -33,24 +33,35 @@ endif

## essential host options
CPPFLAGS = -MD -MP
CXXFLAGS = -std=c++11 -Wall -Wno-unused-local-typedefs -Wno-unused-variable
CXXFLAGS = -std=c++14 -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-deprecated
LDFLAGS = -pthread
LDLIBS =

ifeq ($(BUILD),Release)
CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG
#CXXFLAGS += -fopt-info-vec-optimized=explicit.log -ftree-vectorize
# enable link-time optimization
ifeq ($(CXX),clang++)
CXXFLAGS += -flto=thin
LDFLAGS += -O3 -march=native -mtune=native -flto=thin
endif
else ifeq ($(BUILD),Debug)
CXXFLAGS += -Og -g
LDFLAGS += -rdynamic
endif


## use the LLVM linker with clang
ifeq ($(CXX),clang++)
LDFLAGS += -fuse-ld=lld
endif


## options for nvcc
# automatic dependency generation for nvcc (gcc has it automated in the pre-processor phase with the -MD flag)
# note that $@ is expanded only when $(CUDA_CPPFLAGS) is used
CUDA_CPPFLAGS = --compiler-options -MD,-MP,-MT$@
CUDA_CXXFLAGS = -std=c++11 --compiler-bindir $(CUDA_HOST_COMPILER)
CUDA_CXXFLAGS = -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER)
CUDA_LDLIBS =
# disable false compiler warnings
#   reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910
+50 −44
Original line number Diff line number Diff line
@@ -17,9 +17,11 @@
#include <TNL/Meshes/Geometry/getEntityCenter.h>
#include <TNL/Meshes/Geometry/getEntityMeasure.h>
#include <TNL/Meshes/TypeResolver/TypeResolver.h>
#include <TNL/DevicePointer.h>
#include <TNL/ParallelFor.h>
#include <TNL/StaticFor.h>
#include <TNL/Pointers/DevicePointer.h>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Algorithms/TemplateStaticFor.h>
#include <TNL/Benchmarks/Benchmarks.h>
#include <TNL/Communicators/NoDistrCommunicator.h>

#ifdef HAVE_CUDA
#include <cuda_profiler_api.h>
@@ -28,11 +30,10 @@
#include "MeshOrdering.h"

#include "MeshConfigs.h"
#include "tnl_benchmarks.h"

using namespace TNL;
using namespace TNL::Meshes;
using namespace TNL::benchmarks;
using namespace TNL::Benchmarks;

template< typename Real >
__cuda_callable__
@@ -72,7 +73,7 @@ struct MeshBenchmarks
      Benchmark::MetadataColumns metadataColumns = {
//         {"mesh-file", meshFile},
         {"config", Mesh::Config::getConfigType()},
         {"topology", Mesh::Config::CellTopology::getType().replace("Topologies::", "")},
         {"topology", getType< typename Mesh::Config::CellTopology >().replace("Topologies::", "")},
//         {"wrld dim", worldDimension},
         {"real", getType< typename Mesh::RealType >()},
         {"gid_t", getType< typename Mesh::GlobalIndexType >()},
@@ -82,7 +83,8 @@ struct MeshBenchmarks
      };

      Mesh mesh;
      if( ! loadMesh( meshFile, mesh ) ) {
      DistributedMeshes::DistributedMesh<Mesh> distributedMesh;
      if( ! loadMesh< Communicators::NoDistrCommunicator >( meshFile, mesh, distributedMesh ) ) {
         std::cerr << "Failed to load mesh from file '" << meshFile << "'." << std::endl;
         return false;
      }
@@ -121,8 +123,8 @@ struct MeshBenchmarks

   static void dispatchAlgorithms( Benchmark & benchmark, const Mesh & mesh )
   {
      StaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh );
      StaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh );
      Algorithms::TemplateStaticFor< int, 1, Mesh::getMeshDimension() + 1, CentersDispatch >::execHost( benchmark, mesh );
      Algorithms::TemplateStaticFor< int, 1, Mesh::getMeshDimension() + 1, MeasuresDispatch >::execHost( benchmark, mesh );
      DualMeasuresDispatch::exec( benchmark, mesh );
      SpheresDispatch::exec( benchmark, mesh );
   }
@@ -134,7 +136,7 @@ struct MeshBenchmarks
                typename = typename std::enable_if< M::template entitiesAvailable< EntityDimension >() >::type >
      static void exec( Benchmark & benchmark, const M & mesh )
      {
         benchmark.setOperation( String("Centers (d = ") + String(EntityDimension) + ")" );
         benchmark.setOperation( String("Centers (d = ") + convertToString(EntityDimension) + ")" );
         benchmark_centers< EntityDimension, Devices::Host >( benchmark, mesh );
#ifdef HAVE_CUDA
         benchmark_centers< EntityDimension, Devices::Cuda >( benchmark, mesh );
@@ -156,7 +158,7 @@ struct MeshBenchmarks
                typename = typename std::enable_if< M::template entitiesAvailable< EntityDimension >() >::type >
      static void exec( Benchmark & benchmark, const M & mesh )
      {
         benchmark.setOperation( String("Measures (d = ") + String(EntityDimension) + ")" );
         benchmark.setOperation( String("Measures (d = ") + convertToString(EntityDimension) + ")" );
         benchmark_measures< EntityDimension, Devices::Host >( benchmark, mesh );
#ifdef HAVE_CUDA
         benchmark_measures< EntityDimension, Devices::Cuda >( benchmark, mesh );
@@ -238,9 +240,9 @@ struct MeshBenchmarks
      const Index entitiesCount = mesh_src.template getEntitiesCount< EntityDimension >();

      const DeviceMesh mesh = mesh_src;
      DevicePointer< const DeviceMesh > meshPointer( mesh );
      Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
      Containers::Array< PointType, Device, Index > centers;
      centers.setSize( PointType::size * entitiesCount );
      centers.setSize( PointType::getSize() * entitiesCount );

      auto kernel_measures = [] __cuda_callable__
         ( Index i,
@@ -256,13 +258,14 @@ struct MeshBenchmarks
      };

      auto benchmark_func = [&] () {
         ParallelFor< Device >::exec( (Index) 0, entitiesCount,
         Algorithms::ParallelFor< Device >::exec(
               (Index) 0, entitiesCount,
               kernel_measures,
               &meshPointer.template getData< Device >(),
               centers.getData() );
      };

      benchmark.time( reset,
      benchmark.time< Device >( reset,
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }
@@ -277,7 +280,7 @@ struct MeshBenchmarks
      const Index entitiesCount = mesh_src.template getEntitiesCount< EntityDimension >();

      const DeviceMesh mesh = mesh_src;
      DevicePointer< const DeviceMesh > meshPointer( mesh );
      Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
      Containers::Array< Real, Device, Index > measures;
      measures.setSize( entitiesCount );

@@ -295,13 +298,14 @@ struct MeshBenchmarks
      };

      auto benchmark_func = [&] () {
         ParallelFor< Device >::exec( (Index) 0, entitiesCount,
         Algorithms::ParallelFor< Device >::exec(
               (Index) 0, entitiesCount,
               kernel_measures,
               &meshPointer.template getData< Device >(),
               measures.getData() );
      };

      benchmark.time( reset,
      benchmark.time< Device >( reset,
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }
@@ -323,7 +327,7 @@ struct MeshBenchmarks
      const Index entitiesCount = mesh_src.template getEntitiesCount< Mesh::getMeshDimension() >();

      const DeviceMesh mesh = mesh_src;
      DevicePointer< const DeviceMesh > meshPointer( mesh );
      Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
      Containers::Array< Real, Device, Index > measures;
      measures.setSize( entitiesCount );

@@ -360,13 +364,14 @@ struct MeshBenchmarks
      };

      auto benchmark_func = [&] () {
         ParallelFor< Device >::exec( (Index) 0, entitiesCount,
         Algorithms::ParallelFor< Device >::exec(
               (Index) 0, entitiesCount,
               kernel_measures,
               &meshPointer.template getData< Device >(),
               measures.getData() );
      };

      benchmark.time( reset,
      benchmark.time< Device >( reset,
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }
@@ -386,7 +391,7 @@ struct MeshBenchmarks
      const Index entitiesCount = mesh_src.template getEntitiesCount< 0 >();

      const DeviceMesh mesh = mesh_src;
      DevicePointer< const DeviceMesh > meshPointer( mesh );
      Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
      Containers::Array< Real, Device, Index > spheres;
      spheres.setSize( entitiesCount );

@@ -452,13 +457,14 @@ struct MeshBenchmarks
      };

      auto benchmark_func = [&] () {
         ParallelFor< Device >::exec( (Index) 0, entitiesCount,
         Algorithms::ParallelFor< Device >::exec(
               (Index) 0, entitiesCount,
               kernel_spheres,
               &meshPointer.template getData< Device >(),
               spheres.getData() );
      };

      benchmark.time( reset,
      benchmark.time< Device >( reset,
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }
+3 −26
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@
#pragma once

#include <TNL/String.h>
#include <TNL/param-types.h>
#include <TNL/Meshes/Topologies/SubentityVertexMap.h>

template< typename Cell,
@@ -33,17 +32,6 @@ struct FullConfig
   static constexpr int worldDimension = WorldDimension;
   static constexpr int meshDimension = Cell::dimension;

   static TNL::String getType()
   {
      return TNL::String( "Meshes::FullConfig< " ) +
             Cell::getType() + ", " +
             TNL::String( WorldDimension ) + ", " +
             TNL::getType< Real >() + ", " +
             TNL::getType< GlobalIndex >() + ", " +
             TNL::getType< LocalIndex >() + ", " +
             TNL::getType< Id >() + " >";
   }

   static TNL::String getConfigType()
   {
      return "Full";
@@ -118,17 +106,6 @@ struct MinimalConfig
   static constexpr int worldDimension = WorldDimension;
   static constexpr int meshDimension = Cell::dimension;

   static TNL::String getType()
   {
      return TNL::String( "Meshes::MinimalConfig< " ) +
             Cell::getType() + ", " +
             TNL::String( WorldDimension ) + ", " +
             TNL::getType< Real >() + ", " +
             TNL::getType< GlobalIndex >() + ", " +
             TNL::getType< LocalIndex >() + ", " +
             TNL::getType< Id >() + " >";
   }

   static TNL::String getConfigType()
   {
      return "Minimal";
+6 −6
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ struct KdTreeOrdering
        perm.setSize( numberOfEntities );
        iperm.setSize( numberOfEntities );

        spatial::idle_point_multimap< PointType::size, PointType, IndexType > container;
        spatial::idle_point_multimap< PointType::getSize(), PointType, IndexType > container;

        for( IndexType i = 0; i < numberOfEntities; i++ ) {
            const auto& entity = mesh.template getEntity< MeshEntity >( i );
@@ -312,7 +312,7 @@ public:
        Vector tmp;
        tmp.setLike( vector );

        ParallelFor< Device >::exec( (IndexType) 0, vector.getSize(),
        Algorithms::ParallelFor< Device >::exec( (IndexType) 0, vector.getSize(),
                                                 kernel,
                                                 vector.getData(),
                                                 tmp.getData(),
Loading