Commit 2220c328 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Added check of the benchmark results.

parent cdbedfa4
Loading
Loading
Loading
Loading
+28 −14
Original line number Diff line number Diff line
@@ -202,33 +202,35 @@ public:
         BenchmarkResult & result )
   {
      result.time = std::numeric_limits<double>::quiet_NaN();
      FunctionTimer< Device > functionTimer;
      try {
         if( verbose > 1 ) {
            // run the monitor main loop
            Solvers::SolverMonitorThread monitor_thread( monitor );
            if( this->timing )
               if( this->reset )
                  result.time = FunctionTimer< Device, true >::timeFunction( compute, reset, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< true >( compute, reset, loops, minTime, verbose, monitor );
               else
                  result.time = FunctionTimer< Device, true >::timeFunction( compute, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< true >( compute, loops, minTime, verbose, monitor );
            else
               if( this->reset )
                  result.time = FunctionTimer< Device, false >::timeFunction( compute, reset, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< false >( compute, reset, loops, minTime, verbose, monitor );
               else
                  result.time = FunctionTimer< Device, false >::timeFunction( compute, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< false >( compute, loops, minTime, verbose, monitor );
         }
         else {
            if( this->timing )
               if( this->reset )
                  result.time = FunctionTimer< Device, true >::timeFunction( compute, reset, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< true >( compute, reset, loops, minTime, verbose, monitor );
               else
                  result.time = FunctionTimer< Device, true >::timeFunction( compute, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< true >( compute, loops, minTime, verbose, monitor );
            else
               if( this->reset )
                  result.time = FunctionTimer< Device, false >::timeFunction( compute, reset, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< false >( compute, reset, loops, minTime, verbose, monitor );
               else
                  result.time = FunctionTimer< Device, false >::timeFunction( compute, loops, minTime, verbose, monitor );
                  result.time = functionTimer. template timeFunction< false >( compute, loops, minTime, verbose, monitor );
         }
         this->performedLoops = functionTimer.getPerformedLoops();
      }
      catch ( const std::exception& e ) {
         std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl;
@@ -269,24 +271,25 @@ public:
         BenchmarkResult & result )
   {
      result.time = std::numeric_limits<double>::quiet_NaN();
      FunctionTimer< Device > functionTimer;
      try {
         if( verbose > 1 ) {
            // run the monitor main loop
            Solvers::SolverMonitorThread monitor_thread( monitor );
            if( this->timing )
               result.time = FunctionTimer< Device, true >::timeFunction( compute, loops, minTime, verbose, monitor );
               result.time = functionTimer. template timeFunction< true >( compute, loops, minTime, verbose, monitor );
            else
               result.time = FunctionTimer< Device, false >::timeFunction( compute, loops, minTime, verbose, monitor );
               result.time = functionTimer. template timeFunction< false >( compute, loops, minTime, verbose, monitor );
         }
         else {
            if( this->timing )
               result.time = FunctionTimer< Device, true >::timeFunction( compute, loops, minTime, verbose, monitor );
               result.time = functionTimer. template timeFunction< true >( compute, loops, minTime, verbose, monitor );
            else
               result.time = FunctionTimer< Device, false >::timeFunction( compute, loops, minTime, verbose, monitor );
               result.time = functionTimer. template timeFunction< false >( compute, loops, minTime, verbose, monitor );
         }
      }
      catch ( const std::exception& e ) {
         std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl;
         std::cerr << "Function timer failed due to a C++ exception with description: " << e.what() << std::endl;
      }

      result.bandwidth = datasetSize / result.time;
@@ -320,6 +323,7 @@ public:
      // each computation has 3 subcolumns
      const int colspan = 3 * numberOfComputations;
      writeErrorMessage( msg, colspan );
      std::cerr << msg << std::endl;
   }

   using Logging::save;
@@ -330,8 +334,18 @@ public:
      return monitor;
   }

   int getPerformedLoops() const
   {
      return this->performedLoops;
   }

   bool isResetingOn() const
   {
      return reset;
   }

protected:
   int loops = 1;
   int loops = 1, performedLoops = 0;
   double minTime = 0.0;
   double datasetSize = 0.0;
   double baseTime = 0.0;
+17 −10
Original line number Diff line number Diff line
@@ -22,17 +22,17 @@ namespace TNL {
   namespace Benchmarks {


template< typename Device,
          bool timing >
template< typename Device >
class FunctionTimer
{
   public:
      using DeviceType = Device;

      template< typename ComputeFunction,
      template< bool timing,
                typename ComputeFunction,
                typename ResetFunction,
                typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
      static double
      double
      timeFunction( ComputeFunction compute,
                    ResetFunction reset,
                    int maxLoops,
@@ -52,7 +52,6 @@ class FunctionTimer
         reset();
         compute();

         int loops;
         // If we do not perform reset function and don't need
         // the monitor, the timer is not interrupted after each loop.
         if( ! performReset && verbose < 2 )
@@ -85,7 +84,6 @@ class FunctionTimer
            {
               // abuse the monitor's "time" for loops
               monitor.setTime( loops + 1 );

               reset();

               // Explicit synchronization of the CUDA device
@@ -104,15 +102,17 @@ class FunctionTimer
                  timer.stop();
            }
         }
         std::cerr << loops << std::endl;
         if( timing )
            return timer.getRealTime() / ( double ) loops;
         else
            return std::numeric_limits<double>::quiet_NaN();
      }

      template< typename ComputeFunction,
      template< bool timing,
                typename ComputeFunction,
                typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
      static double
      double
      timeFunction( ComputeFunction compute,
                    int maxLoops,
                    const double& minTime,
@@ -120,8 +120,15 @@ class FunctionTimer
                    Monitor && monitor = Monitor() )
      {
         auto noReset = [] () {};
         return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor, false );
         return timeFunction< timing >( compute, noReset, maxLoops, minTime, verbose, monitor, false );
      }

      int getPerformedLoops() const
      {
         return this->loops;
      }
      protected:
         int loops;
};

   } // namespace Benchmarks
+9 −1
Original line number Diff line number Diff line
@@ -54,12 +54,12 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
       userData( this->u )
      {
         v_data = v.getData();
         u->getData().bind( v );
      }

      void reset()
      {
         v.setValue( 0.0 );
         u->getData().setValue( 0.0 );
      };

      void addOneUsingPureC()
@@ -146,6 +146,14 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
            size );*/
      }

      bool checkAddOne( int loops, bool reseting )
      {
         std::cout << loops << " -> " << v << std::endl;
         if( reseting )
            return v.containsOnlyValue( 1.0 );
         return v.containsOnlyValue( ( Real ) loops );
      }

      void traverseUsingPureC()
      {
         if( std::is_same< Device, Devices::Host >::value )
+9 −2
Original line number Diff line number Diff line
@@ -52,12 +52,12 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
       userData( u )
      {
         v_data = v.getData();
         u->getData().bind( v );
      }

      void reset()
      {
         v.setValue( 0.0 );
         u->getData().setValue( 0.0 );
      };

      void addOneUsingPureC()
@@ -71,7 +71,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
         else // Device == Devices::Cuda
         {
#ifdef HAVE_CUDA
            dim3 blockSize( 256 ), blocksCount, gridsCount;
            dim3 blockSize( 16, 16 ), blocksCount, gridsCount;
            Devices::Cuda::setupThreads(
               blockSize,
               blocksCount,
@@ -183,6 +183,13 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
            }*/
      }

      bool checkAddOne( int loops, bool reseting )
      {
         if( reseting )
            return v.containsOnlyValue( 1.0 );
         return v.containsOnlyValue( ( Real ) loops );
      }

      void traverseUsingPureC()
      {
         if( std::is_same< Device, Devices::Host >::value )
+9 −3
Original line number Diff line number Diff line
@@ -58,12 +58,12 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
        userData( u )
      {
         v_data = v.getData();
         u->getData().bind( v );
      }

      void reset()
      {
         v.setValue( 0.0 );
         u->getData().setValue( 0.0 );
      };

      void addOneUsingPureC()
@@ -78,7 +78,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
         else // Device == Devices::Cuda
         {
#ifdef HAVE_CUDA
            dim3 blockSize( 256 ), blocksCount, gridsCount;
            dim3 blockSize( 32, 4, 2 ), blocksCount, gridsCount;
            Devices::Cuda::setupThreads(
               blockSize,
               blocksCount,
@@ -174,13 +174,19 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
            f, v.getData() );
      }


      void addOneUsingTraverser()
      {
         traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
            ( grid, userData );
      }

      bool checkAddOne( int loops, bool reseting )
      {
         if( reseting )
            return v.containsOnlyValue( 1.0 );
         return v.containsOnlyValue( ( Real ) loops );
      }

      void traverseUsingPureC()
      {
         if( std::is_same< Device, Devices::Host >::value )
Loading