Commit d6396236 authored by Ján Bobot's avatar Ján Bobot Committed by Jakub Klinkovský
Browse files

Fix for FPMA reader and added benchmark for polygonal and polyhedral mesh

parent 551161cd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -7,3 +7,4 @@ add_subdirectory( LinearSolvers )
add_subdirectory( ODESolvers )
add_subdirectory( Sorting )
add_subdirectory( Traversers )
add_subdirectory( Mesh )
+8 −0
Original line number Diff line number Diff line
if( BUILD_CUDA )
   CUDA_ADD_EXECUTABLE( tnl-benchmark-mesh-cuda tnl-benchmark-mesh.cu )
   install( TARGETS tnl-benchmark-mesh-cuda RUNTIME DESTINATION bin )
endif()

ADD_EXECUTABLE( tnl-benchmark-mesh tnl-benchmark-mesh.cpp )

install( TARGETS tnl-benchmark-mesh RUNTIME DESTINATION bin )
+195 −0
Original line number Diff line number Diff line
#pragma once

// References:
// - https://stackoverflow.com/a/64166/4180822
// - https://lemire.me/blog/2020/03/03/calling-free-or-delete/
// - https://stackoverflow.com/questions/15529643/what-does-malloc-trim0-really-mean

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <sys/types.h>
#include <sys/sysinfo.h>
#include <malloc.h>

inline long
getTotalVirtualMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long totalVirtualMem = memInfo.totalram;
    // Add other values in next statement to avoid int overflow on right hand side...
    totalVirtualMem += memInfo.totalswap;
    totalVirtualMem *= memInfo.mem_unit;

    return totalVirtualMem;
}

inline long
getUsedVirtualMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long virtualMemUsed = memInfo.totalram - memInfo.freeram;
    // Add other values in next statement to avoid int overflow on right hand side...
    virtualMemUsed += memInfo.totalswap - memInfo.freeswap;
    virtualMemUsed *= memInfo.mem_unit;

    return virtualMemUsed;
}

inline long
parseLine(char* line)
{
    // This assumes that a digit will be found and the line ends in " kB".
    int i = strlen(line);
    const char* p = line;
    while (*p <'0' || *p > '9') p++;
    line[i-3] = '\0';
    return atol(p);
}

// virtual memory currently used by the calling process
inline long
getSelfVirtualMemory()
{
    // explicitly release unused memory
    malloc_trim(0);

    FILE* file = fopen("/proc/self/status", "r");
    long result = -1;
    char line[128];

    while (fgets(line, 128, file) != NULL){
        if (strncmp(line, "VmSize:", 7) == 0){
            // convert from kB to B
            result = parseLine(line) * 1024;
            break;
        }
    }
    fclose(file);
    return result;
}

inline long
getTotalPhysicalMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long totalPhysMem = memInfo.totalram;
    //Multiply in next statement to avoid int overflow on right hand side...
    totalPhysMem *= memInfo.mem_unit;

    return totalPhysMem;
}

inline long
getUsedPhysicalMemory()
{
    struct sysinfo memInfo;
    sysinfo (&memInfo);

    long physMemUsed = memInfo.totalram - memInfo.freeram;
    //Multiply in next statement to avoid int overflow on right hand side...
    physMemUsed *= memInfo.mem_unit;

    return physMemUsed;
}

inline long
getSelfPhysicalMemory()
{
    // explicitly release unused memory
    malloc_trim(0);

    FILE* file = fopen("/proc/self/status", "r");
    long result = -1;
    char line[128];

    while (fgets(line, 128, file) != NULL){
        if (strncmp(line, "VmRSS:", 6) == 0){
            // convert from kB to B
            result = parseLine(line) * 1024;
            break;
        }
    }
    fclose(file);
    return result;
}


#include <Benchmarks/Benchmarks.h>
#include <TNL/Config/ParameterContainer.h>
#include <TNL/Containers/StaticVector.h>

struct MemoryBenchmarkResult
: public TNL::Benchmarks::BenchmarkResult
{
   using HeaderElements = TNL::Benchmarks::Logging::HeaderElements;
   using RowElements = TNL::Benchmarks::Logging::RowElements;

   double memory = std::numeric_limits<double>::quiet_NaN();
   double memstddev = std::numeric_limits<double>::quiet_NaN();

   virtual HeaderElements getTableHeader() const override
   {
      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup", "memory", "memstddev", "memstddev/memory" });
   }

   virtual RowElements getRowElements() const override
   {
      RowElements elements;
      elements << time << stddev << stddev / time << bandwidth;
      if( speedup != 0 )
         elements << speedup;
      else
         elements << "N/A";
      elements << memory << memstddev << memstddev / memory;
      return elements;
   }
};

template< long MAX_COPIES = 10, typename Mesh >
MemoryBenchmarkResult
testMemoryUsage( const TNL::Config::ParameterContainer& parameters,
                 const Mesh& mesh )
{
    const size_t memoryLimit = parameters.getParameter< size_t >( "mem-limit" ) * 1024 * 1024;
    TNL::Containers::StaticVector< MAX_COPIES, Mesh > meshes;
    TNL::Containers::StaticVector< MAX_COPIES, double > data;
    data.setValue( 0 );

    long prevCheck = getSelfPhysicalMemory();
    meshes[0] = mesh;
    long check = getSelfPhysicalMemory();
    data[0] = check - prevCheck;
    prevCheck = check;
    const int copies = TNL::min( memoryLimit / data[0], MAX_COPIES - 1 ) + 1;

    for( int i = 1; i < copies; i++ ) {
        meshes[i] = mesh;
        check = getSelfPhysicalMemory();
        data[i] = check - prevCheck;
        prevCheck = check;
    }

    MemoryBenchmarkResult result;

    const double mean = TNL::sum( data ) / (double) copies;
    result.memory = mean / 1024.0 / 1024.0;  // MiB

    if( copies > 1 ) {
        for( int i = copies; i < MAX_COPIES; i++ ) {
            data[i] = mean;
        }

        const double stddev = 1.0 / std::sqrt( copies ) * TNL::l2Norm( data - mean );
        result.memstddev = stddev / 1024.0 / 1024.0;  // MiB
    }
    
    return result;
}
+450 −0
Original line number Diff line number Diff line
/***************************************************************************
                          MeshBenchmarks.h  -  description
                             -------------------
    begin                : Nov 21, 2017
    copyright            : (C) 2017 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

// Implemented by: Jakub Klinkovsky

#pragma once

#include <TNL/Meshes/Mesh.h>
#include <TNL/Meshes/Geometry/getEntityMeasure.h>
#include <TNL/Meshes/Geometry/getDecomposedMesh.h>
#include <TNL/Meshes/Geometry/getPlanarMesh.h>
#include <TNL/Pointers/DevicePointer.h>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Algorithms/staticFor.h>
#include <TNL/Meshes/TypeResolver/resolveMeshType.h>
#include <TNL/Meshes/Readers/MeshReader.h>
#include <TNL/Meshes/Topologies/IsDynamicTopology.h>
#include <Benchmarks/Benchmarks.h>

#include "MeshConfigs.h"
#include "MemoryInfo.h"

using namespace TNL;
using namespace TNL::Meshes;
using namespace TNL::Meshes::Readers;
using namespace TNL::Benchmarks;

template< typename Device >
bool checkDevice( const Config::ParameterContainer& parameters )
{
   const String device = parameters.getParameter< String >( "devices" );
   if( device == "all" )
      return true;
   if( std::is_same< Device, Devices::Host >::value && device == "host" )
      return true;
   if( std::is_same< Device, Devices::Cuda >::value && device == "cuda" )
      return true;
   return false;
}

std::string removeNamespaces( const String & topology )
{
  std::size_t found = topology.find_last_of("::");
  return topology.substr( found + 1 );
}

template< typename Mesh >
struct MeshBenchmarks
{
   static_assert( std::is_same< typename Mesh::DeviceType, Devices::Host >::value, "The mesh should be loaded on the host." );

   static bool run( Benchmark & benchmark, const Config::ParameterContainer & parameters )
   {
      Benchmark::MetadataColumns metadataColumns = {
         // {"mesh-file", meshFile},
         {"config", Mesh::Config::getConfigType()},
         {"topology", removeNamespaces( getType< typename Mesh::Config::CellTopology >() ) },
         {"space dim", std::to_string( Mesh::Config::spaceDimension )},
         {"real", getType< typename Mesh::RealType >()},
         {"gid_t", getType< typename Mesh::GlobalIndexType >()},
         {"lid_t", getType< typename Mesh::LocalIndexType >()}
      };

      const String & meshFile = parameters.getParameter< String >( "mesh-file" );
      auto reader = getMeshReader( meshFile, "auto" );
      Mesh mesh;

      try {
         reader->loadMesh( mesh );
      }
      catch( const Meshes::Readers::MeshReaderError& e ) {
         std::cerr << "Failed to load mesh from file '" << meshFile << "'." << std::endl;
         return false;
      }

      benchmark.setMetadataColumns( metadataColumns );
      dispatchTests( benchmark, parameters, mesh, reader );

      return true;
   }

   static void dispatchTests( Benchmark & benchmark, const Config::ParameterContainer & parameters, const Mesh & mesh, std::shared_ptr< MeshReader > reader )
   {
      ReaderDispatch::exec( benchmark, parameters, reader );
      InitDispatch::exec( benchmark, parameters, reader );
      DecompositionDispatch::exec( benchmark, parameters, mesh );
      PlanarDispatch::exec( benchmark, parameters, mesh );
      MeasuresDispatch::exec( benchmark, parameters, mesh );
      MemoryDispatch::exec( benchmark, parameters, mesh );
   }

   struct ReaderDispatch
   {
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
      {
         benchmark.setOperation( String( "Reader" ) );
         benchmark_reader( benchmark, parameters, reader );
      }
   };

   struct InitDispatch
   {
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
      {
         benchmark.setOperation( String( "Init" ) );
         benchmark_init( benchmark, parameters, reader );
      }
   };

   struct DecompositionDispatch
   {
      // Polygonal Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value, bool > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         benchmark.setOperation( String( "Decomposition (c)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (p)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );
      }

      // Polyhedral Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value, bool  > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         benchmark.setOperation( String( "Decomposition (cc)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                  EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (cp)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                  EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (pc)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToPoint,
                                  EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Decomposition (pp)" ) );
         benchmark_decomposition< EntityDecomposerVersion::ConnectEdgesToPoint,
                                  EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );
      }
   };

   struct PlanarDispatch
   {
      template< typename M,
                std::enable_if_t< M::Config::spaceDimension == 3, bool > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
         benchmark.setOperation( String( "Planar Correction (c)" ) );
         benchmark_planar< EntityDecomposerVersion::ConnectEdgesToCentroid >( benchmark, parameters, mesh_src );

         benchmark.setOperation( String( "Planar Correction (p)" ) );
         benchmark_planar< EntityDecomposerVersion::ConnectEdgesToPoint >( benchmark, parameters, mesh_src );
      }

      template< typename M,
                std::enable_if_t< M::Config::spaceDimension < 3, bool > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
      {
      }
   };

   struct MeasuresDispatch
   {
      // Polygonal Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value, bool > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh )
      {
         benchmark.setOperation( String("Measures") );
         exec_helper( benchmark, parameters, mesh );

         {
            benchmark.setOperation( String("Measures (decomp (c))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToCentroid >( mesh );
            exec_helper( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Measures (decomp (p))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToPoint >( mesh );
            exec_helper( benchmark, parameters, decomposedMesh );
         }
      }

      // Polyhedral Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value, bool > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh )
      {
         benchmark.setOperation( String("Measures") );
         exec_helper( benchmark, parameters, mesh );

         {
            benchmark.setOperation( String("Measures (decomp (cc))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                                           EntityDecomposerVersion::ConnectEdgesToCentroid >( mesh );
            exec_helper( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Measures (decomp (cp))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                                           EntityDecomposerVersion::ConnectEdgesToPoint >( mesh );
            exec_helper( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Measures (decomp (pc))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToPoint,
                                                           EntityDecomposerVersion::ConnectEdgesToCentroid >( mesh );
            exec_helper( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Measures (decomp (pp))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToPoint,
                                                           EntityDecomposerVersion::ConnectEdgesToPoint >( mesh );
            exec_helper( benchmark, parameters, decomposedMesh );
         }
      }
   private:
      template< typename M >
      static void exec_helper( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh )
      {
         benchmark_measures< Devices::Host >( benchmark, parameters, mesh );
#ifdef HAVE_CUDA
         benchmark_measures< Devices::Cuda >( benchmark, parameters, mesh );
#endif
      }
   };

   struct MemoryDispatch
   {
      // Polygonal Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value, bool > = true >
      static void exec( Benchmark& benchmark, const Config::ParameterContainer& parameters, const M& mesh_src )
      {
         benchmark.setOperation( String("Memory") );
         benchmark_memory( benchmark, parameters, mesh_src );

         {
            benchmark.setOperation( String("Memory (decomp (c))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToCentroid >( mesh_src );
            benchmark_memory( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Memory (decomp (p))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToPoint >( mesh_src );
            benchmark_memory( benchmark, parameters, decomposedMesh );
         }
      }

      // Polyhedral Mesh
      template< typename M,
                std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value, bool  > = true >
      static void exec( Benchmark & benchmark, const Config::ParameterContainer& parameters, const M& mesh_src )
      {
         benchmark.setOperation( String("Memory") );
         benchmark_memory( benchmark, parameters, mesh_src );

         {
            benchmark.setOperation( String("Memory (decomp (cc))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                                           EntityDecomposerVersion::ConnectEdgesToCentroid >( mesh_src );
            benchmark_memory( benchmark, parameters, decomposedMesh );
         }


         {
            benchmark.setOperation( String("Memory (decomp (cp))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToCentroid,
                                                           EntityDecomposerVersion::ConnectEdgesToPoint >( mesh_src );
            benchmark_memory( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Memory (decomp (pc))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToPoint,
                                                           EntityDecomposerVersion::ConnectEdgesToCentroid >( mesh_src );
            benchmark_memory( benchmark, parameters, decomposedMesh );
         }

         {
            benchmark.setOperation( String("Memory (decomp (pp))") );
            const auto decomposedMesh = getDecomposedMesh< EntityDecomposerVersion::ConnectEdgesToPoint,
                                                           EntityDecomposerVersion::ConnectEdgesToPoint >( mesh_src );
            benchmark_memory( benchmark, parameters, decomposedMesh );
         }
      }
   };

   static void benchmark_reader( Benchmark & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
   {
      auto reset = [&]() {
         reader->reset();
      };

      auto benchmark_func = [&] () {
         reader->detectMesh();
      };

      benchmark.time< Devices::Host >( reset,
                                       "CPU",
                                       benchmark_func );
   }

   static void benchmark_init( Benchmark & benchmark, const Config::ParameterContainer & parameters, std::shared_ptr< MeshReader > reader )
   {
      auto reset = [&]() {
         reader->detectMesh();
      };

      auto benchmark_func = [&] () {
         Mesh mesh;
         reader->loadMesh( mesh );
      };

      benchmark.time< Devices::Host >( reset,
                                       "CPU",
                                       benchmark_func );
   }

   // benchmark_decomposition (Polygonal Mesh)
   template< EntityDecomposerVersion DecomposerVersion,
             typename M,
             std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polygon >::value, bool > = true >
   static void benchmark_decomposition( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      auto benchmark_func = [&] () {
         const auto decomposedMesh = getDecomposedMesh< DecomposerVersion >( mesh_src );
      };

      benchmark.time< Devices::Host >( "CPU",
                                       benchmark_func );
   }

   // benchmark_decomposition (Polyhedral Mesh)
   template< EntityDecomposerVersion DecomposerVersion,
             EntityDecomposerVersion SubDecomposerVersion,
             typename M,
             std::enable_if_t< std::is_same< typename M::Config::CellTopology, Topologies::Polyhedron >::value, bool > = true >
   static void benchmark_decomposition( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      auto benchmark_func = [&] () {
         const auto decomposedMesh = getDecomposedMesh< DecomposerVersion, SubDecomposerVersion >( mesh_src );
      };

      benchmark.time< Devices::Host >( "CPU",
                                       benchmark_func );
   }

   template< EntityDecomposerVersion DecomposerVersion,
             typename M,
             std::enable_if_t< M::Config::spaceDimension == 3, bool > = true >
   static void benchmark_planar( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      auto benchmark_func = [&] () {
         const auto planarMesh = getPlanarMesh< DecomposerVersion >( mesh_src );
      };

      benchmark.time< Devices::Host >( "CPU",
                                       benchmark_func );
   }

   template< typename Device,
             typename M >
   static void benchmark_measures( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      using Real = typename M::RealType;
      using Index = typename M::GlobalIndexType;
      using DeviceMesh = Meshes::Mesh< typename M::Config, Device >;

      // skip benchmarks on devices which the user did not select
      if( ! checkDevice< Device >( parameters ) )
         return;

      const Index entitiesCount = mesh_src.template getEntitiesCount< M::getMeshDimension() >();

      const DeviceMesh mesh = mesh_src;
      Pointers::DevicePointer< const DeviceMesh > meshPointer( mesh );
      Containers::Array< Real, Device, Index > measures;
      measures.setSize( entitiesCount );

      auto kernel_measures = [] __cuda_callable__
         ( Index i,
           const DeviceMesh* mesh,
           Real* array )
      {
         const auto& entity = mesh->template getEntity< M::getMeshDimension() >( i );
         array[ i ] = getEntityMeasure( *mesh, entity );
      };

      auto reset = [&]() {
         measures.setValue( 0.0 );
      };

      auto benchmark_func = [&] () {
         Algorithms::ParallelFor< Device >::exec(
               (Index) 0, entitiesCount,
               kernel_measures,
               &meshPointer.template getData< Device >(),
               measures.getData() );
      };

      benchmark.time< Device >( reset,
                                (std::is_same< Device, Devices::Host >::value) ? "CPU" : "GPU",
                                benchmark_func );
   }

   template< typename M >
   static void benchmark_memory( Benchmark & benchmark, const Config::ParameterContainer & parameters, const M & mesh_src )
   {
      MemoryBenchmarkResult memResult = testMemoryUsage( parameters, mesh_src );
      auto noop = [](){};
      benchmark.time< TNL::Devices::Host >( "CPU", noop, memResult );
   }
};

template< template< typename, int, typename, typename, typename > class ConfigTemplate,
          typename CellTopology,
          int SpaceDimension,
          typename Real,
          typename GlobalIndex,
          typename LocalIndex >
struct MeshBenchmarksRunner
{
    static bool
    run( Benchmark & benchmark,
         Benchmark::MetadataMap metadata,
         const Config::ParameterContainer & parameters )
   {
      using Config = ConfigTemplate< CellTopology, SpaceDimension, Real, GlobalIndex, LocalIndex >;
      using MeshType = Mesh< Config, Devices::Host >;
      return MeshBenchmarks< MeshType >::run( benchmark, parameters );
   }
};
+138 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading