Commit 12749a09 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Mesh benchmarks moved to the tnl-benchmark-mesh (sub)project

parent 1fb01a57
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -7,4 +7,3 @@ add_subdirectory( LinearSolvers )
#add_subdirectory( ODESolvers )
add_subdirectory( Sorting )
add_subdirectory( Traversers )
add_subdirectory( Mesh )
+0 −35
Original line number Diff line number Diff line
if( BUILD_CUDA )
   CUDA_ADD_EXECUTABLE( tnl-benchmark-mesh-cuda tnl-benchmark-mesh.cu )
   
   find_package( tinyxml2 QUIET )
   if( tinyxml2_FOUND )
      target_compile_definitions(tnl-benchmark-mesh-cuda PUBLIC "-DHAVE_TINYXML2")
      target_link_libraries(tnl-benchmark-mesh-cuda tinyxml2::tinyxml2)
   endif()

   find_package( ZLIB )
   if( ZLIB_FOUND )
      target_compile_definitions(tnl-benchmark-mesh-cuda PUBLIC "-DHAVE_ZLIB")
      target_include_directories(tnl-benchmark-mesh-cuda PUBLIC ${ZLIB_INCLUDE_DIRS})
      target_link_libraries(tnl-benchmark-mesh-cuda ${ZLIB_LIBRARIES})
   endif()
   
   install( TARGETS tnl-benchmark-mesh-cuda RUNTIME DESTINATION bin )
endif()

ADD_EXECUTABLE( tnl-benchmark-mesh tnl-benchmark-mesh.cpp )

find_package( tinyxml2 QUIET )
if( tinyxml2_FOUND )
   target_compile_definitions(tnl-benchmark-mesh PUBLIC "-DHAVE_TINYXML2")
   target_link_libraries(tnl-benchmark-mesh tinyxml2::tinyxml2)
endif()

find_package( ZLIB )
if( ZLIB_FOUND )
   target_compile_definitions(tnl-benchmark-mesh PUBLIC "-DHAVE_ZLIB")
   target_include_directories(tnl-benchmark-mesh PUBLIC ${ZLIB_INCLUDE_DIRS})
   target_link_libraries(tnl-benchmark-mesh ${ZLIB_LIBRARIES})
endif()

install( TARGETS tnl-benchmark-mesh RUNTIME DESTINATION bin )

src/Benchmarks/Mesh/MemoryInfo.h

deleted100644 → 0
+0 −195
Original line number Diff line number Diff line
#pragma once

// References:
// - https://stackoverflow.com/a/64166/4180822
// - https://lemire.me/blog/2020/03/03/calling-free-or-delete/
// - https://stackoverflow.com/questions/15529643/what-does-malloc-trim0-really-mean

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <sys/types.h>
#include <sys/sysinfo.h>
#include <malloc.h>

inline long
getTotalVirtualMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long totalVirtualMem = memInfo.totalram;
    // Add other values in next statement to avoid int overflow on right hand side...
    totalVirtualMem += memInfo.totalswap;
    totalVirtualMem *= memInfo.mem_unit;

    return totalVirtualMem;
}

inline long
getUsedVirtualMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long virtualMemUsed = memInfo.totalram - memInfo.freeram;
    // Add other values in next statement to avoid int overflow on right hand side...
    virtualMemUsed += memInfo.totalswap - memInfo.freeswap;
    virtualMemUsed *= memInfo.mem_unit;

    return virtualMemUsed;
}

inline long
parseLine(char* line)
{
    // This assumes that a digit will be found and the line ends in " kB".
    int i = strlen(line);
    const char* p = line;
    while (*p <'0' || *p > '9') p++;
    line[i-3] = '\0';
    return atol(p);
}

// virtual memory currently used by the calling process
inline long
getSelfVirtualMemory()
{
    // explicitly release unused memory
    malloc_trim(0);

    FILE* file = fopen("/proc/self/status", "r");
    long result = -1;
    char line[128];

    while (fgets(line, 128, file) != NULL){
        if (strncmp(line, "VmSize:", 7) == 0){
            // convert from kB to B
            result = parseLine(line) * 1024;
            break;
        }
    }
    fclose(file);
    return result;
}

inline long
getTotalPhysicalMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long totalPhysMem = memInfo.totalram;
    //Multiply in next statement to avoid int overflow on right hand side...
    totalPhysMem *= memInfo.mem_unit;

    return totalPhysMem;
}

inline long
getUsedPhysicalMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long physMemUsed = memInfo.totalram - memInfo.freeram;
    //Multiply in next statement to avoid int overflow on right hand side...
    physMemUsed *= memInfo.mem_unit;

    return physMemUsed;
}

inline long
getSelfPhysicalMemory()
{
    // explicitly release unused memory
    malloc_trim(0);

    FILE* file = fopen("/proc/self/status", "r");
    long result = -1;
    char line[128];

    while (fgets(line, 128, file) != NULL){
        if (strncmp(line, "VmRSS:", 6) == 0){
            // convert from kB to B
            result = parseLine(line) * 1024;
            break;
        }
    }
    fclose(file);
    return result;
}


#include <TNL/Benchmarks/Benchmarks.h>
#include <TNL/Config/ParameterContainer.h>
#include <TNL/Containers/StaticVector.h>

struct MemoryBenchmarkResult
: public TNL::Benchmarks::BenchmarkResult
{
   using HeaderElements = TNL::Benchmarks::Logging::HeaderElements;
   using RowElements = TNL::Benchmarks::Logging::RowElements;

   double memory = std::numeric_limits<double>::quiet_NaN();
   double memstddev = std::numeric_limits<double>::quiet_NaN();

   virtual HeaderElements getTableHeader() const override
   {
      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup", "memory", "memstddev", "memstddev/memory" });
   }

   virtual RowElements getRowElements() const override
   {
      RowElements elements;
      elements << time << stddev << stddev / time << bandwidth;
      if( speedup != 0 )
         elements << speedup;
      else
         elements << "N/A";
      elements << memory << memstddev << memstddev / memory;
      return elements;
   }
};

template< long MAX_COPIES = 10, typename Mesh >
MemoryBenchmarkResult
testMemoryUsage( const TNL::Config::ParameterContainer& parameters,
                 const Mesh& mesh )
{
    const size_t memoryLimit = parameters.getParameter< size_t >( "mem-limit" ) * 1024 * 1024;
    TNL::Containers::StaticVector< MAX_COPIES, Mesh > meshes;
    TNL::Containers::StaticVector< MAX_COPIES, double > data;
    data.setValue( 0 );

    long prevCheck = getSelfPhysicalMemory();
    meshes[0] = mesh;
    long check = getSelfPhysicalMemory();
    data[0] = check - prevCheck;
    prevCheck = check;
    const int copies = TNL::min( memoryLimit / data[0], MAX_COPIES - 1 ) + 1;

    for( int i = 1; i < copies; i++ ) {
        meshes[i] = mesh;
        check = getSelfPhysicalMemory();
        data[i] = check - prevCheck;
        prevCheck = check;
    }

    MemoryBenchmarkResult result;

    const double mean = TNL::sum( data ) / (double) copies;
    result.memory = mean / 1024.0 / 1024.0;  // MiB

    if( copies > 1 ) {
        for( int i = copies; i < MAX_COPIES; i++ ) {
            data[i] = mean;
        }

        const double stddev = 1.0 / std::sqrt( copies ) * TNL::l2Norm( data - mean );
        result.memstddev = stddev / 1024.0 / 1024.0;  // MiB
    }

    return result;
}
+0 −386
Original line number Diff line number Diff line
// Implemented by: Ján Bobot, Jakub Klinkovský

#pragma once

#include <TNL/Meshes/Mesh.h>
#include <TNL/Meshes/Geometry/getEntityMeasure.h>
#include <TNL/Meshes/Geometry/getDecomposedMesh.h>
#include <TNL/Meshes/Geometry/getPlanarMesh.h>
#include <TNL/Pointers/DevicePointer.h>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Algorithms/staticFor.h>
#include <TNL/Meshes/TypeResolver/resolveMeshType.h>
#include <TNL/Meshes/Readers/MeshReader.h>
#include <TNL/Meshes/Topologies/IsDynamicTopology.h>
#include <TNL/Benchmarks/Benchmarks.h>

#include "MeshConfigs.h"
#include "MemoryInfo.h"

using namespace TNL;
using namespace TNL::Meshes;
using namespace TNL::Meshes::Readers;
using namespace TNL::Benchmarks;

template< typename Device >
bool checkDevice( const Config::ParameterContainer& parameters )
{
   const String device = parameters.getParameter< String >( "devices" );
   if( device == "all" )
      return true;
   if( std::is_same< Device, Devices::Host >::value && device == "host" )
      return true;
   if( std::is_same< Device, Devices::Cuda >::value && device == "cuda" )
      return true;
   return false;
}

std::string removeNamespaces( const String & topology )
{
  std::size_t found = topology.find_last_of("::");
  return topology.substr( found + 1 );
}

template< typename Mesh >
struct MeshBenchmarks
{
   static_assert( std::is_same< typename Mesh::DeviceType, Devices::Host >::value, "The mesh should be loaded on the host." );

   static bool run( Benchmark<> & benchmark, const Config::ParameterContainer & parameters )
   {
      Logging::MetadataColumns metadataColumns = {
         // {"mesh-file", meshFile},
         {"config", Mesh::Config::getConfigType()},
         //{"topology", removeNamespaces( getType< typename Mesh::Config::CellTopology >() ) },
         //{"space dim", std::to_string( Mesh::Config::spaceDimension )},
         //{"real", getType< typename Mesh::RealType >()},
         //{"gid_t", getType< typename Mesh::GlobalIndexType >()},
         //{"lid_t", getType< typename Mesh::LocalIndexType >()}
      };
      benchmark.setMetadataColumns( metadataColumns );

      const String & meshFile = parameters.getParameter< String >( "mesh-file" );
      auto reader = getMeshReader( meshFile, "auto" );
      Mesh mesh;

      try {
         reader->loadMesh( mesh );
      }
      catch( const Meshes::Readers::MeshReaderError& e ) {
         std::cerr << "Failed to load mesh from file '" << meshFile << "'." << std::endl;
         return false;
      }

      dispatchTests( benchmark, parameters, mesh, reader );

      return true;
   }

   static void dispatchTests( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const Mesh & mesh, std::shared_ptr< MeshReader > reader )
   {
      ReaderDispatch::exec( benchmark, parameters, reader );
      InitDispatch::exec( benchmark, parameters, reader );
      DecompositionDispatch::exec( benchmark, parameters, mesh );
      PlanarDispatch::exec( benchmark, parameters, mesh );
      MeasuresDispatch::exec( benchmark, parameters, mesh );
      MemoryDispatch::exec( benchmark, parameters, mesh );
      CopyDispatch::exec( benchmark, parameters, mesh );
   }

   struct ReaderDispatch
   {
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
      {
         benchmark.setOperation( String( "Reader" ) );
         benchmark_reader( benchmark, parameters, reader );
      }
   };

   struct InitDispatch
   {
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
      {
         benchmark.setOperation( String( "Init" ) );
         benchmark_init( benchmark, parameters, reader );
      }
   };

   struct DecompositionDispatch
   {
      // Polygonal Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value, bool > = true >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         benchmark.setOperation( String( "Decomposition (c)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (p)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );
      }

      // Polyhedral Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value, bool  > = true >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         benchmark.setOperation( String( "Decomposition (cc)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                  EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (cp)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                  EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (pc)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToPoint,
                                  EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (pp)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToPoint,
                                  EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );
      }

      // Other than Polygonal and Polyhedral Mesh
      template< typename M,
                std::enable_if_t< ! std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value &&
                                  ! std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value, bool  > = true >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
      }
   };

   struct PlanarDispatch
   {
      template< typename M,
                std::enable_if_t< M::Config::spaceDimension == 3 &&
                                 (std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value ||
                                  std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value ), bool > = true >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         benchmark.setOperation( String( "Planar Correction (c)" ) );
         benchmark_planar< EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Planar Correction (p)" ) );
         benchmark_planar< EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );
      }

      template< typename M,
                std::enable_if_t< M::Config::spaceDimension < 3 ||
                                 (! std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value &&
                                  ! std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value ), bool > = true >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
      }
   };

   struct MeasuresDispatch
   {
      template< typename M >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh )
      {
         benchmark.setOperation( String("Measures") );
         benchmark_measures< Devices::Host >( benchmark, parameters, mesh );
         #ifdef HAVE_CUDA
         benchmark_measures< Devices::Cuda >( benchmark, parameters, mesh );
         #endif
      }
   };

   struct MemoryDispatch
   {
      template< typename M >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer& parameters, const M& mesh_src )
      {
         benchmark.setOperation( String("Memory") );
         benchmark_memory( benchmark, parameters, mesh_src );
      }
   };

   struct CopyDispatch
   {
      template< typename M >
      static void exec( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         #ifdef HAVE_CUDA
         benchmark.setOperation( String("Copy CPU->GPU") );
         benchmark_copy< Devices::Host, Devices::Cuda >( benchmark, parameters, mesh_src );
         benchmark.setOperation( String("Copy GPU->CPU") );
         benchmark_copy< Devices::Cuda, Devices::Host >( benchmark, parameters, mesh_src );
         #endif
      }
   };

   static void benchmark_reader( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
   {
      if( ! checkDevice< Devices::Host >( parameters ) )
         return;

      auto reset = [&]() {
         reader->reset();
      };

      auto benchmark_func = [&] () {
         reader->detectMesh();
      };

      benchmark.time< Devices::Host >( reset,
                                       "CPU",
                                       benchmark_func );
   }

   static void benchmark_init( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
   {
      if( ! checkDevice< Devices::Host >( parameters ) )
         return;

      auto reset = [&]() {
         reader->detectMesh();
      };

      auto benchmark_func = [&] () {
         Mesh mesh;
         reader->loadMesh( mesh );
      };

      benchmark.time< Devices::Host >( reset,
                                       "CPU",
                                       benchmark_func );
   }

   template< EntityDecomposerVersion DecomposerVersion,
             EntityDecomposerVersion SubDecomposerVersion = EntityDecomposerVersion::ConnectEdgesToPoint,
             typename M >
   static void benchmark_decomposition( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      // skip benchmarks on devices which the user did not select
      if( ! checkDevice< Devices::Host >( parameters ) )
         return;

      auto benchmark_func = [&] () {
         auto meshBuilder = decomposeMesh< DecomposerVersion, SubDecomposerVersion >( mesh_src );
      };

      benchmark.time< Devices::Host >( "CPU",
                                       benchmark_func );
   }

   template< EntityDecomposerVersion DecomposerVersion,
             typename M,
             std::enable_if_t< M::Config::spaceDimension == 3 &&
                              (std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value ||
                               std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value ), bool > = true >
   static void benchmark_planar( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      if( ! checkDevice< Devices::Host >( parameters ) )
         return;

      auto benchmark_func = [&] () {
         auto meshBuilder = planarCorrection< DecomposerVersion >( mesh_src );
      };

      benchmark.time< Devices::Host >( "CPU",
                                       benchmark_func );
   }

   template< typename Device,
             typename M >
   static void benchmark_measures( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      using Real = typename M::RealType;
      using Index = typename M::GlobalIndexType;
      using DeviceMesh = Meshes::Mesh< typename M::Config, Device >;

      // skip benchmarks on devices which the user did not select
      if( ! checkDevice< Device >( parameters ) )
         return;

      const Index entitiesCount = mesh_src.template getEntitiesCount< M::getMeshDimension() >();

      const DeviceMesh mesh = mesh_src;
      Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
      Containers::Array< Real, Device, Index > measures;
      measures.setSize( entitiesCount );

      auto kernel_measures = [] __cuda_callable__
         ( Index i,
           const DeviceMesh* mesh,
           Real* array )
      {
         const auto& entity = mesh->template getEntity< M::getMeshDimension() >( i );
         array[ i ] = getEntityMeasure( *mesh, entity );
      };

      auto reset = [&]() {
         measures.setValue( 0.0 );
      };

      auto benchmark_func = [&] () {
         Algorithms::ParallelFor< Device >::exec(
               (Index) 0, entitiesCount,
               kernel_measures,
               &meshPointer.template getData< Device >(),
               measures.getData() );
      };

      benchmark.time< Device >( reset,
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }

   template< typename M >
   static void benchmark_memory( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      if( ! checkDevice< Devices::Host >( parameters ) )
            return;

      MemoryBenchmarkResult memResult = testMemoryUsage( parameters, mesh_src );
      auto noop = [](){};
      benchmark.time< TNL::Devices::Host >( "CPU", noop, memResult );
   }

   template< typename DeviceFrom,
             typename DeviceTo,
             typename M >
   static void benchmark_copy( Benchmark<> & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      using MeshFrom = Meshes::Mesh< typename M::Config, DeviceFrom >;
      using MeshTo = Meshes::Mesh< typename M::Config, DeviceTo >;
      using Device = typename std::conditional_t< std::is_same< DeviceFrom, Devices::Host >::value &&
                                                  std::is_same< DeviceTo, Devices::Host >::value,
                                                  Devices::Host,
                                                  Devices::Cuda >;

      // skip benchmarks on devices which the user did not select
      if( ! checkDevice< Device >( parameters ) )
         return;

      const MeshFrom meshFrom = mesh_src;

      auto benchmark_func = [&] () {
         MeshTo meshTo = meshFrom;
      };

      benchmark.time< Device >( [] () {},
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }
};

template< template< typename, int, typename, typename, typename > class ConfigTemplate,
          typename CellTopology,
          int SpaceDimension,
          typename Real,
          typename GlobalIndex,
          typename LocalIndex >
struct MeshBenchmarksRunner
{
    static bool
    run( Benchmark<> & benchmark,
         const Config::ParameterContainer & parameters )
   {
      using Config = ConfigTemplate< CellTopology, SpaceDimension, Real, GlobalIndex, LocalIndex >;
      using MeshType = Mesh< Config, Devices::Host >;
      return MeshBenchmarks< MeshType >::run( benchmark, parameters );
   }
};

src/Benchmarks/Mesh/MeshConfigs.h

deleted100644 → 0
+0 −128

File deleted.

Preview size limit exceeded, changes collapsed.

Loading