Commit 2bea9311 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Benchmarks: compute sample standard deviation of the measured computation times

parent e6e6cf46
Loading
Loading
Loading
Loading
+12 −11
Original line number Diff line number Diff line
@@ -17,7 +17,6 @@
#include "Logging.h"

#include <iostream>
#include <iomanip>
#include <exception>
#include <limits>

@@ -35,24 +34,24 @@ namespace Benchmarks {
const double oneGB = 1024.0 * 1024.0 * 1024.0;



struct BenchmarkResult
{
   using HeaderElements = Logging::HeaderElements;
   using RowElements = Logging::RowElements;

   double bandwidth = std::numeric_limits<double>::quiet_NaN();
   double time = std::numeric_limits<double>::quiet_NaN();
   double stddev = std::numeric_limits<double>::quiet_NaN();
   double bandwidth = std::numeric_limits<double>::quiet_NaN();
   double speedup = std::numeric_limits<double>::quiet_NaN();

   virtual HeaderElements getTableHeader() const
   {
      return HeaderElements({"bandwidth", "time", "speedup"});
      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup" });
   }

   virtual RowElements getRowElements() const
   {
      return RowElements({ bandwidth, time, speedup });
      return RowElements({ time, stddev, stddev / time, bandwidth, speedup });
   }
};

@@ -200,21 +199,22 @@ public:
         BenchmarkResult & result )
   {
      result.time = std::numeric_limits<double>::quiet_NaN();
      result.stddev = std::numeric_limits<double>::quiet_NaN();
      FunctionTimer< Device > functionTimer;
      try {
         if( verbose > 1 ) {
            // run the monitor main loop
            Solvers::SolverMonitorThread monitor_thread( monitor );
            if( this->reset )
               result.time = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor );
               std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor );
            else
               result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
               std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
         }
         else {
            if( this->reset )
               result.time = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor );
               std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor );
            else
               result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
               std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
         }
         this->performedLoops = functionTimer.getPerformedLoops();
      }
@@ -257,15 +257,16 @@ public:
         BenchmarkResult & result )
   {
      result.time = std::numeric_limits<double>::quiet_NaN();
      result.stddev = std::numeric_limits<double>::quiet_NaN();
      FunctionTimer< Device > functionTimer;
      try {
         if( verbose > 1 ) {
            // run the monitor main loop
            Solvers::SolverMonitorThread monitor_thread( monitor );
            result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
         }
         else {
            result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor );
         }
      }
      catch ( const std::exception& e ) {
+77 −83
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@

#include <TNL/Timer.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Containers/Vector.h>
#include <TNL/Solvers/IterativeSolverMonitor.h>

namespace TNL {
@@ -26,19 +27,18 @@ template< typename Device >
class FunctionTimer
{
public:
      using DeviceType = Device;

   // returns a pair of (mean, stddev) where mean is the arithmetic mean of the
   // computation times and stddev is the sample standard deviation
   template< typename ComputeFunction,
             typename ResetFunction,
             typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
      double
   std::pair< double, double >
   timeFunction( ComputeFunction compute,
                 ResetFunction reset,
                 int maxLoops,
                 const double& minTime,
                 int verbose = 1,
                    Monitor && monitor = Monitor(),
                    bool performReset = true )
                 Monitor && monitor = Monitor() )
   {
      // the timer is constructed zero-initialized and stopped
      Timer timer;
@@ -51,32 +51,11 @@ class FunctionTimer
      reset();
      compute();

         // If we do not perform reset function and don't need
         // the monitor, the timer is not interrupted after each loop.
         if( ! performReset && verbose < 2 )
         {
            // Explicit synchronization of the CUDA device
#ifdef HAVE_CUDA
               if( std::is_same< Device, Devices::Cuda >::value )
                  cudaDeviceSynchronize();
#endif
            timer.start();
      Containers::Vector< double > results( maxLoops );
      results.setValue( 0.0 );

      for( loops = 0;
                 loops < maxLoops || timer.getRealTime() < minTime;
                 loops++ )
               compute();
            // Explicit synchronization of the CUDA device
#ifdef HAVE_CUDA
            if( std::is_same< Device, Devices::Cuda >::value )
               cudaDeviceSynchronize();
#endif
            timer.stop();
         }
         else
         {
            for( loops = 0;
                 loops < maxLoops || timer.getRealTime() < minTime;
           loops < maxLoops || sum( results ) < minTime;
           loops++ )
      {
         // abuse the monitor's "time" for loops
@@ -88,6 +67,9 @@ class FunctionTimer
         if( std::is_same< Device, Devices::Cuda >::value )
            cudaDeviceSynchronize();
#endif

         // reset timer before each computation
         timer.reset();
         timer.start();
         compute();
#ifdef HAVE_CUDA
@@ -95,14 +77,26 @@ class FunctionTimer
            cudaDeviceSynchronize();
#endif
         timer.stop();

         results[ loops ] = timer.getRealTime();
      }

      const double mean = sum( results ) / (double) loops;
      if( loops > 1 ) {
         const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
         return std::make_pair( mean, stddev );
      }
      else {
         const double stddev = std::numeric_limits<double>::quiet_NaN();
         return std::make_pair( mean, stddev );
      }
         return timer.getRealTime() / ( double ) loops;
   }

   // returns a pair of (mean, stddev) where mean is the arithmetic mean of the
   // computation times and stddev is the sample standard deviation
   template< typename ComputeFunction,
             typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
      double
   std::pair< double, double >
   timeFunction( ComputeFunction compute,
                 int maxLoops,
                 const double& minTime,
@@ -110,7 +104,7 @@ class FunctionTimer
                 Monitor && monitor = Monitor() )
   {
      auto noReset = [] () {};
         return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor, false );
      return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor );
   }

   int getPerformedLoops() const
+182 −183
Original line number Diff line number Diff line
@@ -16,9 +16,12 @@
#include <map>
#include <vector>
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>

#include <TNL/String.h>

namespace TNL {
namespace Benchmarks {

@@ -209,7 +212,6 @@ class Logging
   }

protected:

   // manual double -> String conversion with fixed precision
   static String
   _to_string( double num, int precision = 0, bool fixed = false )
@@ -233,8 +235,5 @@ class Logging
   std::vector< std::pair< String, int > > horizontalGroups;
};


} // namespace Benchmarks
} // namespace TNL