Loading src/Benchmarks/BLAS/array-operations.h +2 −5 Original line number Diff line number Diff line Loading @@ -116,10 +116,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark, hostArray = hostArray2; }; benchmark.setOperation( "copy (operator=)", 2 * datasetSize ); // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will // complain when compiling without CUDA const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost ); (void)copyBasetime; // ignore unused variable benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost ); #ifdef HAVE_CUDA auto copyAssignCudaCuda = [&]() { deviceArray = deviceArray2; Loading @@ -135,7 +132,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark, auto copyAssignCudaHost = [&]() { hostArray = deviceArray; }; benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime ); benchmark.setOperation( "copy (operator=)", datasetSize, benchmark.getBaseTime() ); benchmark.time< Devices::Cuda >( reset1, "CPU->GPU", copyAssignHostCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU->CPU", copyAssignCudaHost ); #endif Loading src/Benchmarks/Benchmarks.h +23 −30 Original line number Diff line number Diff line Loading @@ -36,6 +36,7 @@ struct BenchmarkResult using HeaderElements = typename Logging::HeaderElements; using RowElements = typename Logging::RowElements; int loops = 0; double time = std::numeric_limits<double>::quiet_NaN(); double stddev = std::numeric_limits<double>::quiet_NaN(); double bandwidth = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -120,12 +121,10 @@ class Benchmark // Times a single ComputeFunction. Subsequent calls implicitly split // the current operation into sub-columns identified by "performer", // which are further split into "bandwidth", "time" and "speedup" columns. // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation) // Also terminates the recursion of the following variadic template. template< typename Device, typename ResetFunction, typename ComputeFunction > double time( ResetFunction reset, void time( ResetFunction reset, const String & performer, ComputeFunction & compute, BenchmarkResult & result ); Loading @@ -133,26 +132,20 @@ class Benchmark template< typename Device, typename ResetFunction, typename ComputeFunction > inline double time( ResetFunction reset, BenchmarkResult time( ResetFunction reset, const String & performer, ComputeFunction & compute ); /*{ BenchmarkResult result; return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result ); }*/ /**** * The same methods as above but without reset function */ // The same methods as above but without the reset function template< typename Device, typename ComputeFunction > double time( const String & performer, void time( const String & performer, ComputeFunction & compute, BenchmarkResult & result ); template< typename Device, typename ComputeFunction > inline double time( const String & performer, BenchmarkResult time( const String & performer, ComputeFunction & compute ); // Adds an error message to the log. Should be called in places where the Loading @@ -163,14 +156,14 @@ class Benchmark SolverMonitorType& getMonitor(); int getPerformedLoops() const; double getBaseTime() const; bool isResetingOn() const; protected: Logger logger; int loops = 1, performedLoops = 0; int loops = 1; double minTime = 0.0; Loading src/Benchmarks/Benchmarks.hpp +14 −15 Original line number Diff line number Diff line Loading @@ -137,7 +137,7 @@ template< typename Logger > template< typename Device, typename ResetFunction, typename ComputeFunction > double void Benchmark< Logger >:: time( ResetFunction reset, const String & performer, Loading @@ -157,12 +157,11 @@ time( ResetFunction reset, std::string errorMessage; try { if( this->reset ) std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor ); std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor ); else { auto noReset = [] () {}; std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor ); std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor ); } this->performedLoops = functionTimer.getPerformedLoops(); } catch ( const std::exception& e ) { errorMessage = "timeFunction failed due to a C++ exception with description: " + std::string(e.what()); Loading @@ -175,47 +174,47 @@ time( ResetFunction reset, this->baseTime = result.time; logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints(), errorMessage ); return this->baseTime; } template< typename Logger > template< typename Device, typename ResetFunction, typename ComputeFunction > inline double BenchmarkResult Benchmark< Logger >:: time( ResetFunction reset, const String& performer, ComputeFunction& compute ) { BenchmarkResult result; return time< Device >( reset, performer, compute, result ); time< Device >( reset, performer, compute, result ); return result; } template< typename Logger > template< typename Device, typename ComputeFunction > double void Benchmark< Logger >:: time( const String & performer, ComputeFunction & compute, BenchmarkResult & result ) { auto noReset = [] () {}; return time< Device >( noReset, performer, compute, result ); time< Device >( noReset, performer, compute, result ); } template< typename Logger > template< typename Device, typename ComputeFunction > inline double BenchmarkResult Benchmark< Logger >:: time( const String & performer, ComputeFunction & compute ) { BenchmarkResult result; return time< Device >( performer, compute, result ); time< Device >( performer, compute, result ); return result; } template< typename Logger > Loading Loading @@ -244,11 +243,11 @@ getMonitor() -> SolverMonitorType& } template< typename Logger > int double Benchmark< Logger >:: getPerformedLoops() const getBaseTime() const { return this->performedLoops; return baseTime; } template< typename Logger > Loading src/Benchmarks/FunctionTimer.h +11 −19 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #pragma once #include <type_traits> #include <tuple> #include <TNL/Timer.h> #include <TNL/Devices/Cuda.h> Loading @@ -27,12 +27,13 @@ template< typename Device > class FunctionTimer { public: // returns a pair of (mean, stddev) where mean is the arithmetic mean of the // returns a tuple of (loops, mean, stddev) where loops is the number of // performed loops (i.e. timing samples), mean is the arithmetic mean of the // computation times and stddev is the sample standard deviation template< typename ComputeFunction, typename ResetFunction, typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > > std::pair< double, double > std::tuple< int, double, double > timeFunction( ComputeFunction compute, ResetFunction reset, int maxLoops, Loading @@ -52,6 +53,7 @@ public: Containers::Vector< double > results( maxLoops ); results.setValue( 0.0 ); int loops; for( loops = 0; loops < maxLoops || sum( results ) < minTime; loops++ ) Loading Loading @@ -80,23 +82,13 @@ public: } const double mean = sum( results ) / (double) loops; if( loops > 1 ) { const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean ); return std::make_pair( mean, stddev ); } else { const double stddev = std::numeric_limits<double>::quiet_NaN(); return std::make_pair( mean, stddev ); } } int getPerformedLoops() const { return this->loops; double stddev; if( loops > 1 ) stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean ); else stddev = std::numeric_limits<double>::quiet_NaN(); return std::make_tuple( loops, mean, stddev ); } protected: int loops; }; } // namespace Benchmarks Loading Loading
src/Benchmarks/BLAS/array-operations.h +2 −5 Original line number Diff line number Diff line Loading @@ -116,10 +116,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark, hostArray = hostArray2; }; benchmark.setOperation( "copy (operator=)", 2 * datasetSize ); // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will // complain when compiling without CUDA const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost ); (void)copyBasetime; // ignore unused variable benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost ); #ifdef HAVE_CUDA auto copyAssignCudaCuda = [&]() { deviceArray = deviceArray2; Loading @@ -135,7 +132,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark, auto copyAssignCudaHost = [&]() { hostArray = deviceArray; }; benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime ); benchmark.setOperation( "copy (operator=)", datasetSize, benchmark.getBaseTime() ); benchmark.time< Devices::Cuda >( reset1, "CPU->GPU", copyAssignHostCuda ); benchmark.time< Devices::Cuda >( reset1, "GPU->CPU", copyAssignCudaHost ); #endif Loading
src/Benchmarks/Benchmarks.h +23 −30 Original line number Diff line number Diff line Loading @@ -36,6 +36,7 @@ struct BenchmarkResult using HeaderElements = typename Logging::HeaderElements; using RowElements = typename Logging::RowElements; int loops = 0; double time = std::numeric_limits<double>::quiet_NaN(); double stddev = std::numeric_limits<double>::quiet_NaN(); double bandwidth = std::numeric_limits<double>::quiet_NaN(); Loading Loading @@ -120,12 +121,10 @@ class Benchmark // Times a single ComputeFunction. Subsequent calls implicitly split // the current operation into sub-columns identified by "performer", // which are further split into "bandwidth", "time" and "speedup" columns. // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation) // Also terminates the recursion of the following variadic template. template< typename Device, typename ResetFunction, typename ComputeFunction > double time( ResetFunction reset, void time( ResetFunction reset, const String & performer, ComputeFunction & compute, BenchmarkResult & result ); Loading @@ -133,26 +132,20 @@ class Benchmark template< typename Device, typename ResetFunction, typename ComputeFunction > inline double time( ResetFunction reset, BenchmarkResult time( ResetFunction reset, const String & performer, ComputeFunction & compute ); /*{ BenchmarkResult result; return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result ); }*/ /**** * The same methods as above but without reset function */ // The same methods as above but without the reset function template< typename Device, typename ComputeFunction > double time( const String & performer, void time( const String & performer, ComputeFunction & compute, BenchmarkResult & result ); template< typename Device, typename ComputeFunction > inline double time( const String & performer, BenchmarkResult time( const String & performer, ComputeFunction & compute ); // Adds an error message to the log. Should be called in places where the Loading @@ -163,14 +156,14 @@ class Benchmark SolverMonitorType& getMonitor(); int getPerformedLoops() const; double getBaseTime() const; bool isResetingOn() const; protected: Logger logger; int loops = 1, performedLoops = 0; int loops = 1; double minTime = 0.0; Loading
src/Benchmarks/Benchmarks.hpp +14 −15 Original line number Diff line number Diff line Loading @@ -137,7 +137,7 @@ template< typename Logger > template< typename Device, typename ResetFunction, typename ComputeFunction > double void Benchmark< Logger >:: time( ResetFunction reset, const String & performer, Loading @@ -157,12 +157,11 @@ time( ResetFunction reset, std::string errorMessage; try { if( this->reset ) std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor ); std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor ); else { auto noReset = [] () {}; std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor ); std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor ); } this->performedLoops = functionTimer.getPerformedLoops(); } catch ( const std::exception& e ) { errorMessage = "timeFunction failed due to a C++ exception with description: " + std::string(e.what()); Loading @@ -175,47 +174,47 @@ time( ResetFunction reset, this->baseTime = result.time; logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints(), errorMessage ); return this->baseTime; } template< typename Logger > template< typename Device, typename ResetFunction, typename ComputeFunction > inline double BenchmarkResult Benchmark< Logger >:: time( ResetFunction reset, const String& performer, ComputeFunction& compute ) { BenchmarkResult result; return time< Device >( reset, performer, compute, result ); time< Device >( reset, performer, compute, result ); return result; } template< typename Logger > template< typename Device, typename ComputeFunction > double void Benchmark< Logger >:: time( const String & performer, ComputeFunction & compute, BenchmarkResult & result ) { auto noReset = [] () {}; return time< Device >( noReset, performer, compute, result ); time< Device >( noReset, performer, compute, result ); } template< typename Logger > template< typename Device, typename ComputeFunction > inline double BenchmarkResult Benchmark< Logger >:: time( const String & performer, ComputeFunction & compute ) { BenchmarkResult result; return time< Device >( performer, compute, result ); time< Device >( performer, compute, result ); return result; } template< typename Logger > Loading Loading @@ -244,11 +243,11 @@ getMonitor() -> SolverMonitorType& } template< typename Logger > int double Benchmark< Logger >:: getPerformedLoops() const getBaseTime() const { return this->performedLoops; return baseTime; } template< typename Logger > Loading
src/Benchmarks/FunctionTimer.h +11 −19 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #pragma once #include <type_traits> #include <tuple> #include <TNL/Timer.h> #include <TNL/Devices/Cuda.h> Loading @@ -27,12 +27,13 @@ template< typename Device > class FunctionTimer { public: // returns a pair of (mean, stddev) where mean is the arithmetic mean of the // returns a tuple of (loops, mean, stddev) where loops is the number of // performed loops (i.e. timing samples), mean is the arithmetic mean of the // computation times and stddev is the sample standard deviation template< typename ComputeFunction, typename ResetFunction, typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > > std::pair< double, double > std::tuple< int, double, double > timeFunction( ComputeFunction compute, ResetFunction reset, int maxLoops, Loading @@ -52,6 +53,7 @@ public: Containers::Vector< double > results( maxLoops ); results.setValue( 0.0 ); int loops; for( loops = 0; loops < maxLoops || sum( results ) < minTime; loops++ ) Loading Loading @@ -80,23 +82,13 @@ public: } const double mean = sum( results ) / (double) loops; if( loops > 1 ) { const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean ); return std::make_pair( mean, stddev ); } else { const double stddev = std::numeric_limits<double>::quiet_NaN(); return std::make_pair( mean, stddev ); } } int getPerformedLoops() const { return this->loops; double stddev; if( loops > 1 ) stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean ); else stddev = std::numeric_limits<double>::quiet_NaN(); return std::make_tuple( loops, mean, stddev ); } protected: int loops; }; } // namespace Benchmarks Loading