Commit be5054f8 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Benchmarks: performed loops are returned via the BenchmarkResults struct,...

Benchmarks: performed loops are returned via the BenchmarkResults struct, base-time via the getBaseTime method
parent c0af5a39
Loading
Loading
Loading
Loading
+2 −5
Original line number Diff line number Diff line
@@ -116,10 +116,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark,
      hostArray = hostArray2;
   };
   benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
   // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will
   // complain when compiling without CUDA
   const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost );
   (void)copyBasetime;  // ignore unused variable
   benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost );
#ifdef HAVE_CUDA
   auto copyAssignCudaCuda = [&]() {
      deviceArray = deviceArray2;
@@ -135,7 +132,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark,
   auto copyAssignCudaHost = [&]() {
      hostArray = deviceArray;
   };
   benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime );
   benchmark.setOperation( "copy (operator=)", datasetSize, benchmark.getBaseTime() );
   benchmark.time< Devices::Cuda >( reset1, "CPU->GPU", copyAssignHostCuda );
   benchmark.time< Devices::Cuda >( reset1, "GPU->CPU", copyAssignCudaHost );
#endif
+23 −30
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ struct BenchmarkResult
   using HeaderElements = typename Logging::HeaderElements;
   using RowElements = typename Logging::RowElements;

   int loops = 0;
   double time = std::numeric_limits<double>::quiet_NaN();
   double stddev = std::numeric_limits<double>::quiet_NaN();
   double bandwidth = std::numeric_limits<double>::quiet_NaN();
@@ -120,12 +121,10 @@ class Benchmark
      // Times a single ComputeFunction. Subsequent calls implicitly split
      // the current operation into sub-columns identified by "performer",
      // which are further split into "bandwidth", "time" and "speedup" columns.
      // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation)
      // Also terminates the recursion of the following variadic template.
      template< typename Device,
                typename ResetFunction,
                typename ComputeFunction >
      double time( ResetFunction reset,
      void time( ResetFunction reset,
                 const String & performer,
                 ComputeFunction & compute,
                 BenchmarkResult & result );
@@ -133,26 +132,20 @@ class Benchmark
      template< typename Device,
                typename ResetFunction,
                typename ComputeFunction >
      inline double time( ResetFunction reset,
      BenchmarkResult time( ResetFunction reset,
                            const String & performer,
                            ComputeFunction & compute );
      /*{
         BenchmarkResult result;
         return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
      }*/

      /****
       * The same methods as above but without reset function
       */

      // The same methods as above but without the reset function
      template< typename Device,
                typename ComputeFunction >
      double time( const String & performer,
      void time( const String & performer,
                 ComputeFunction & compute,
                 BenchmarkResult & result );

      template< typename Device,
                typename ComputeFunction >
      inline double time( const String & performer,
      BenchmarkResult time( const String & performer,
                            ComputeFunction & compute );

      // Adds an error message to the log. Should be called in places where the
@@ -163,14 +156,14 @@ class Benchmark

      SolverMonitorType& getMonitor();

      int getPerformedLoops() const;
      double getBaseTime() const;

      bool isResetingOn() const;

   protected:
      Logger logger;

      int loops = 1, performedLoops = 0;
      int loops = 1;

      double minTime = 0.0;

+14 −15
Original line number Diff line number Diff line
@@ -137,7 +137,7 @@ template< typename Logger >
   template< typename Device,
             typename ResetFunction,
             typename ComputeFunction >
double
void
Benchmark< Logger >::
time( ResetFunction reset,
      const String & performer,
@@ -157,12 +157,11 @@ time( ResetFunction reset,
   std::string errorMessage;
   try {
      if( this->reset )
         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
         std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
      else {
         auto noReset = [] () {};
         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor );
         std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor );
      }
      this->performedLoops = functionTimer.getPerformedLoops();
   }
   catch ( const std::exception& e ) {
      errorMessage = "timeFunction failed due to a C++ exception with description: " + std::string(e.what());
@@ -175,47 +174,47 @@ time( ResetFunction reset,
      this->baseTime = result.time;

   logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints(), errorMessage );

   return this->baseTime;
}

template< typename Logger >
   template< typename Device,
             typename ResetFunction,
             typename ComputeFunction >
inline double
BenchmarkResult
Benchmark< Logger >::
time( ResetFunction reset,
      const String& performer,
      ComputeFunction& compute )
{
   BenchmarkResult result;
   return time< Device >( reset, performer, compute, result );
   time< Device >( reset, performer, compute, result );
   return result;
}

template< typename Logger >
   template< typename Device,
             typename ComputeFunction >
double
void
Benchmark< Logger >::
time( const String & performer,
      ComputeFunction & compute,
      BenchmarkResult & result )
{
   auto noReset = [] () {};
   return time< Device >( noReset, performer, compute, result );
   time< Device >( noReset, performer, compute, result );
}

template< typename Logger >
   template< typename Device,
             typename ComputeFunction >
inline double
BenchmarkResult
Benchmark< Logger >::
time( const String & performer,
      ComputeFunction & compute )
{
   BenchmarkResult result;
   return time< Device >( performer, compute, result );
   time< Device >( performer, compute, result );
   return result;
}

template< typename Logger >
@@ -244,11 +243,11 @@ getMonitor() -> SolverMonitorType&
}

template< typename Logger >
int
double
Benchmark< Logger >::
getPerformedLoops() const
getBaseTime() const
{
   return this->performedLoops;
   return baseTime;
}

template< typename Logger >
+11 −19
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@

#pragma once

#include <type_traits>
#include <tuple>

#include <TNL/Timer.h>
#include <TNL/Devices/Cuda.h>
@@ -27,12 +27,13 @@ template< typename Device >
class FunctionTimer
{
public:
   // returns a pair of (mean, stddev) where mean is the arithmetic mean of the
   // returns a tuple of (loops, mean, stddev) where loops is the number of
   // performed loops (i.e. timing samples), mean is the arithmetic mean of the
   // computation times and stddev is the sample standard deviation
   template< typename ComputeFunction,
             typename ResetFunction,
             typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
   std::pair< double, double >
   std::tuple< int, double, double >
   timeFunction( ComputeFunction compute,
                 ResetFunction reset,
                 int maxLoops,
@@ -52,6 +53,7 @@ public:
      Containers::Vector< double > results( maxLoops );
      results.setValue( 0.0 );

      int loops;
      for( loops = 0;
           loops < maxLoops || sum( results ) < minTime;
           loops++ )
@@ -80,23 +82,13 @@ public:
      }

      const double mean = sum( results ) / (double) loops;
      if( loops > 1 ) {
         const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
         return std::make_pair( mean, stddev );
      }
      else {
         const double stddev = std::numeric_limits<double>::quiet_NaN();
         return std::make_pair( mean, stddev );
      }
   }

   int getPerformedLoops() const
   {
      return this->loops;
      double stddev;
      if( loops > 1 )
         stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
      else
         stddev = std::numeric_limits<double>::quiet_NaN();
      return std::make_tuple( loops, mean, stddev );
   }

protected:
   int loops;
};

} // namespace Benchmarks