Loading src/Benchmarks/Benchmarks.h +12 −11 Original line number Diff line number Diff line Loading @@ -17,7 +17,6 @@ #include "Logging.h" #include <iostream> #include <iomanip> #include <exception> #include <limits> Loading @@ -35,24 +34,24 @@ namespace Benchmarks { const double oneGB = 1024.0 * 1024.0 * 1024.0; struct BenchmarkResult { using HeaderElements = Logging::HeaderElements; using RowElements = Logging::RowElements; double bandwidth = std::numeric_limits<double>::quiet_NaN(); double time = std::numeric_limits<double>::quiet_NaN(); double stddev = std::numeric_limits<double>::quiet_NaN(); double bandwidth = std::numeric_limits<double>::quiet_NaN(); double speedup = std::numeric_limits<double>::quiet_NaN(); virtual HeaderElements getTableHeader() const { return HeaderElements({"bandwidth", "time", "speedup"}); return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup" }); } virtual RowElements getRowElements() const { return RowElements({ bandwidth, time, speedup }); return RowElements({ time, stddev, stddev / time, bandwidth, speedup }); } }; Loading Loading @@ -200,21 +199,22 @@ public: BenchmarkResult & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); FunctionTimer< Device > functionTimer; try { if( verbose > 1 ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); if( this->reset ) result.time = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); else result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } else { if( this->reset ) result.time = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); else result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } this->performedLoops = functionTimer.getPerformedLoops(); } Loading Loading @@ -257,15 +257,16 @@ public: BenchmarkResult & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); FunctionTimer< Device > functionTimer; try { if( verbose > 1 ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } else { result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } } catch ( const std::exception& e ) { Loading src/Benchmarks/FunctionTimer.h +77 −83 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ #include <TNL/Timer.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> #include <TNL/Solvers/IterativeSolverMonitor.h> namespace TNL { Loading @@ -26,19 +27,18 @@ template< typename Device > class FunctionTimer { public: using DeviceType = Device; // returns a pair of (mean, stddev) where mean is the arithmetic mean of the // computation times and stddev is the sample standard deviation template< typename ComputeFunction, typename ResetFunction, typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > > double std::pair< double, double > timeFunction( ComputeFunction compute, ResetFunction reset, int maxLoops, const double& minTime, int verbose = 1, Monitor && monitor = Monitor(), bool performReset = true ) Monitor && monitor = Monitor() ) { // the timer is constructed zero-initialized and stopped Timer timer; Loading @@ -51,32 +51,11 @@ class FunctionTimer reset(); compute(); // If we do not perform reset function and don't need // the monitor, the timer is not interrupted after each loop. if( ! performReset && verbose < 2 ) { // Explicit synchronization of the CUDA device #ifdef HAVE_CUDA if( std::is_same< Device, Devices::Cuda >::value ) cudaDeviceSynchronize(); #endif timer.start(); Containers::Vector< double > results( maxLoops ); results.setValue( 0.0 ); for( loops = 0; loops < maxLoops || timer.getRealTime() < minTime; loops++ ) compute(); // Explicit synchronization of the CUDA device #ifdef HAVE_CUDA if( std::is_same< Device, Devices::Cuda >::value ) cudaDeviceSynchronize(); #endif timer.stop(); } else { for( loops = 0; loops < maxLoops || timer.getRealTime() < minTime; loops < maxLoops || sum( results ) < minTime; loops++ ) { // abuse the monitor's "time" for loops Loading @@ -88,6 +67,9 @@ class FunctionTimer if( std::is_same< Device, Devices::Cuda >::value ) cudaDeviceSynchronize(); #endif // reset timer before each computation timer.reset(); timer.start(); compute(); #ifdef HAVE_CUDA Loading @@ -95,14 +77,26 @@ class FunctionTimer cudaDeviceSynchronize(); #endif timer.stop(); results[ loops ] = timer.getRealTime(); } const double mean = sum( results ) / (double) loops; if( loops > 1 ) { const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean ); return std::make_pair( mean, stddev ); } else { const double stddev = std::numeric_limits<double>::quiet_NaN(); return std::make_pair( mean, stddev ); } return timer.getRealTime() / ( double ) loops; } // returns a pair of (mean, stddev) where mean is the arithmetic mean of the // computation times and stddev is the sample standard deviation template< typename ComputeFunction, typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > > double std::pair< double, double > timeFunction( ComputeFunction compute, int maxLoops, const double& minTime, Loading @@ -110,7 +104,7 @@ class FunctionTimer Monitor && monitor = Monitor() ) { auto noReset = [] () {}; return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor, false ); return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor ); } int getPerformedLoops() const Loading src/Benchmarks/Logging.h +182 −183 Original line number Diff line number Diff line Loading @@ -16,9 +16,12 @@ #include <map> #include <vector> #include <iostream> #include <iomanip> #include <string> #include <sstream> #include <TNL/String.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -209,7 +212,6 @@ class Logging } protected: // manual double -> String conversion with fixed precision static String _to_string( double num, int precision = 0, bool fixed = false ) Loading @@ -233,8 +235,5 @@ class Logging std::vector< std::pair< String, int > > horizontalGroups; }; } // namespace Benchmarks } // namespace TNL Loading
src/Benchmarks/Benchmarks.h +12 −11 Original line number Diff line number Diff line Loading @@ -17,7 +17,6 @@ #include "Logging.h" #include <iostream> #include <iomanip> #include <exception> #include <limits> Loading @@ -35,24 +34,24 @@ namespace Benchmarks { const double oneGB = 1024.0 * 1024.0 * 1024.0; struct BenchmarkResult { using HeaderElements = Logging::HeaderElements; using RowElements = Logging::RowElements; double bandwidth = std::numeric_limits<double>::quiet_NaN(); double time = std::numeric_limits<double>::quiet_NaN(); double stddev = std::numeric_limits<double>::quiet_NaN(); double bandwidth = std::numeric_limits<double>::quiet_NaN(); double speedup = std::numeric_limits<double>::quiet_NaN(); virtual HeaderElements getTableHeader() const { return HeaderElements({"bandwidth", "time", "speedup"}); return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup" }); } virtual RowElements getRowElements() const { return RowElements({ bandwidth, time, speedup }); return RowElements({ time, stddev, stddev / time, bandwidth, speedup }); } }; Loading Loading @@ -200,21 +199,22 @@ public: BenchmarkResult & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); FunctionTimer< Device > functionTimer; try { if( verbose > 1 ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); if( this->reset ) result.time = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); else result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } else { if( this->reset ) result.time = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, verbose, monitor ); else result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } this->performedLoops = functionTimer.getPerformedLoops(); } Loading Loading @@ -257,15 +257,16 @@ public: BenchmarkResult & result ) { result.time = std::numeric_limits<double>::quiet_NaN(); result.stddev = std::numeric_limits<double>::quiet_NaN(); FunctionTimer< Device > functionTimer; try { if( verbose > 1 ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } else { result.time = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, verbose, monitor ); } } catch ( const std::exception& e ) { Loading
src/Benchmarks/FunctionTimer.h +77 −83 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ #include <TNL/Timer.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> #include <TNL/Solvers/IterativeSolverMonitor.h> namespace TNL { Loading @@ -26,19 +27,18 @@ template< typename Device > class FunctionTimer { public: using DeviceType = Device; // returns a pair of (mean, stddev) where mean is the arithmetic mean of the // computation times and stddev is the sample standard deviation template< typename ComputeFunction, typename ResetFunction, typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > > double std::pair< double, double > timeFunction( ComputeFunction compute, ResetFunction reset, int maxLoops, const double& minTime, int verbose = 1, Monitor && monitor = Monitor(), bool performReset = true ) Monitor && monitor = Monitor() ) { // the timer is constructed zero-initialized and stopped Timer timer; Loading @@ -51,32 +51,11 @@ class FunctionTimer reset(); compute(); // If we do not perform reset function and don't need // the monitor, the timer is not interrupted after each loop. if( ! performReset && verbose < 2 ) { // Explicit synchronization of the CUDA device #ifdef HAVE_CUDA if( std::is_same< Device, Devices::Cuda >::value ) cudaDeviceSynchronize(); #endif timer.start(); Containers::Vector< double > results( maxLoops ); results.setValue( 0.0 ); for( loops = 0; loops < maxLoops || timer.getRealTime() < minTime; loops++ ) compute(); // Explicit synchronization of the CUDA device #ifdef HAVE_CUDA if( std::is_same< Device, Devices::Cuda >::value ) cudaDeviceSynchronize(); #endif timer.stop(); } else { for( loops = 0; loops < maxLoops || timer.getRealTime() < minTime; loops < maxLoops || sum( results ) < minTime; loops++ ) { // abuse the monitor's "time" for loops Loading @@ -88,6 +67,9 @@ class FunctionTimer if( std::is_same< Device, Devices::Cuda >::value ) cudaDeviceSynchronize(); #endif // reset timer before each computation timer.reset(); timer.start(); compute(); #ifdef HAVE_CUDA Loading @@ -95,14 +77,26 @@ class FunctionTimer cudaDeviceSynchronize(); #endif timer.stop(); results[ loops ] = timer.getRealTime(); } const double mean = sum( results ) / (double) loops; if( loops > 1 ) { const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean ); return std::make_pair( mean, stddev ); } else { const double stddev = std::numeric_limits<double>::quiet_NaN(); return std::make_pair( mean, stddev ); } return timer.getRealTime() / ( double ) loops; } // returns a pair of (mean, stddev) where mean is the arithmetic mean of the // computation times and stddev is the sample standard deviation template< typename ComputeFunction, typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > > double std::pair< double, double > timeFunction( ComputeFunction compute, int maxLoops, const double& minTime, Loading @@ -110,7 +104,7 @@ class FunctionTimer Monitor && monitor = Monitor() ) { auto noReset = [] () {}; return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor, false ); return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor ); } int getPerformedLoops() const Loading
src/Benchmarks/Logging.h +182 −183 Original line number Diff line number Diff line Loading @@ -16,9 +16,12 @@ #include <map> #include <vector> #include <iostream> #include <iomanip> #include <string> #include <sstream> #include <TNL/String.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -209,7 +212,6 @@ class Logging } protected: // manual double -> String conversion with fixed precision static String _to_string( double num, int precision = 0, bool fixed = false ) Loading @@ -233,8 +235,5 @@ class Logging std::vector< std::pair< String, int > > horizontalGroups; }; } // namespace Benchmarks } // namespace TNL