From 358923c943a83155ea6f0d195dfd2fe2797e30ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 3 Nov 2021 10:50:16 +0100
Subject: [PATCH 01/40] Fixed includes in benchmarks header files

---
 src/Benchmarks/Benchmark.hpp  | 11 +----------
 src/Benchmarks/Benchmarks.h   |  6 ++----
 src/Benchmarks/CMakeLists.txt |  2 ++
 src/Benchmarks/JsonLogging.h  |  1 +
 src/Benchmarks/Logging.h      |  1 +
 5 files changed, 7 insertions(+), 14 deletions(-)
diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index e2357990a..1aa28d6ea 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -13,20 +13,11 @@
 
 #pragma once
 
+#include "Benchmarks.h"
 #include "FunctionTimer.h"
-#include "Logging.h"
 
 #include <iostream>
 #include <exception>
-#include <limits>
-
-#include <TNL/String.h>
-
-#include <TNL/Devices/Host.h>
-#include <TNL/SystemInfo.h>
-#include <TNL/Cuda/DeviceInfo.h>
-#include <TNL/Config/ConfigDescription.h>
-#include <TNL/MPI/Wrappers.h>
 
 namespace TNL {
 namespace Benchmarks {
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 77fa9e47c..fe957d866 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -13,14 +13,12 @@
 
 #pragma once
 
-#include "FunctionTimer.h"
 #include "Logging.h"
 
-#include <iostream>
-#include <exception>
 #include <limits>
 
 #include <TNL/String.h>
+#include <TNL/Solvers/IterativeSolverMonitor.h>
 
 #include <TNL/Devices/Host.h>
 #include <TNL/SystemInfo.h>
@@ -253,4 +251,4 @@ inline typename Benchmark< Logger >::MetadataMap getHardwareMetadata()
 } // namespace Benchmarks
 } // namespace TNL
 
-#include <Benchmarks/Benchmark.hpp>
+#include "Benchmark.hpp"
diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 4e1961b3c..288439a26 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -10,8 +10,10 @@ add_subdirectory( Traversers )
 
 set( headers
          Benchmarks.h
+         Benchmark.hpp
          FunctionTimer.h
          Logging.h
+         JsonLogging.h
 )
 
 install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 7d9817c65..d98643b67 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -13,6 +13,7 @@
 
 #pragma once
 
+#include <list>
 #include <map>
 #include <vector>
 #include <iostream>
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 2c8262d21..544d71662 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -13,6 +13,7 @@
 
 #pragma once
 
+#include <list>
 #include <map>
 #include <vector>
 #include <iostream>
-- 
GitLab


From 5bb353007593381c100649e13cb871e5a1d685e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 3 Nov 2021 16:59:17 +0100
Subject: [PATCH 02/40] Benchmarks: refactored HeaderElements back to plain
 vector of strings

Specifying the widths using pairs is just ugly and backwards
incompatible. Changed to specifying using two vectors.
---
 src/Benchmarks/Benchmarks.h               | 12 +++++-----
 src/Benchmarks/JsonLogging.h              | 27 +++++++++++++++--------
 src/Benchmarks/LinearSolvers/benchmarks.h | 10 +--------
 src/Benchmarks/Logging.h                  |  6 ++---
 src/Benchmarks/SpMV/SpmvBenchmarkResult.h | 17 +++++---------
 src/Benchmarks/SpMV/spmv.h                |  4 ++--
 6 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index fe957d866..5701cf747 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -45,12 +45,12 @@ struct BenchmarkResult
 
    virtual HeaderElements getTableHeader() const
    {
-      return HeaderElements( {
-         std::pair< String, int >( "time", 8 ),
-         std::pair< String, int >( "stddev", 8 ),
-         std::pair< String, int >( "stddev/time", 8 ),
-         std::pair< String, int >( "bandwidth", 8 ),
-         std::pair< String, int >( "speedup", 8 ) } );
+      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup" });
+   }
+
+   virtual std::vector< int > getColumnWidthHints() const
+   {
+      return std::vector< int >({ 8, 8, 8, 8, 8 });
    }
 
    virtual RowElements getRowElements() const
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index d98643b67..6b4bd9c1c 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -84,9 +84,10 @@ public:
    using MetadataColumns = std::vector<MetadataElement>;
 
    using CommonLogs = std::vector< std::pair< const char*, String > >;
-   using LogsMetadata = std::vector< std::pair< String, int > >;
+   using LogsMetadata = HeaderElements;
+   using WidthHints = std::vector< int >;
 
-   using HeaderElements = std::vector< std::pair< String, int > >;
+   using HeaderElements = std::vector< String >;
    using RowElements = JsonLoggingRowElements;
 
    JsonLogging( int verbose = true,
@@ -113,19 +114,25 @@ public:
       }
    };
 
-   void resetLogsMetada() { this->logsMetadata.clear(); };
+   void resetLogsMetada()
+   {
+      this->logsMetadata.clear();
+      this->widthHints.clear();
+   }
 
-   void addLogsMetadata( const std::vector< std::pair< String, int > >& md )
+   void addLogsMetadata( const LogsMetadata& md, const WidthHints& widths )
    {
       this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() );
+      this->widthHints.insert( this->widthHints.end(), widths.begin(), widths.end() );
    }
 
    void writeHeader()
    {
+      TNL_ASSERT_EQ( this->logsMetadata.size(), this->widthHints.size(), "" );
       if( verbose )
       {
-         for( auto md : this->logsMetadata )
-            std::cout << std::setw( md.second ) << md.first;
+         for( std::size_t i = 0; i < this->logsMetadata.size(); i++ )
+            std::cout << std::setw( this->widthHints[ i ] ) << this->logsMetadata[ i ];
          std::cout << std::endl;
       }
    }
@@ -147,14 +154,15 @@ public:
          log << "         \"" << lg.first << "\" : \"" << lg.second << "\"";
       }
 
-      auto md = this->logsMetadata.begin();
+      std::size_t i = 0;
       for( auto el : rowEls )
       {
          if( verbose )
-            std::cout << std::setw( md->second ) << el;
+            std::cout << std::setw( this->widthHints[ i ] ) << el;
          if( idx++ > 0 )
             log << "," << std::endl;
-         log << "         \"" << md++->first << "\" : \"" << el << "\"";
+         log << "         \"" << this->logsMetadata[ i ] << "\" : \"" << el << "\"";
+         i++;
       }
       log << std::endl << "      }";
       this->lineStarted = true;
@@ -256,6 +264,7 @@ protected:
 
    // new JSON implementation
    LogsMetadata logsMetadata;
+   WidthHints widthHints;
    CommonLogs commonLogs;
    String outputMode;
 
diff --git a/src/Benchmarks/LinearSolvers/benchmarks.h b/src/Benchmarks/LinearSolvers/benchmarks.h
index 59d2ab3de..899ccbd48 100644
--- a/src/Benchmarks/LinearSolvers/benchmarks.h
+++ b/src/Benchmarks/LinearSolvers/benchmarks.h
@@ -145,15 +145,7 @@ benchmarkSolver( Benchmark<>& benchmark,
 
       virtual HeaderElements getTableHeader() const override
       {
-         return HeaderElements( {
-            std::pair< String, int >( "time", 8 ),
-            std::pair< String, int >( "stddev", 8 ),
-            std::pair< String, int >( "stddev/time", 8 ),
-            std::pair< String, int >( "speedup", 8 ),
-            std::pair< String, int >( "converged", 8 ),
-            std::pair< String, int >( "iterations", 8 ),
-            std::pair< String, int >( "residue_precond", 8 ),
-            std::pair< String, int >( "residue_true", 8 ) } );
+         return HeaderElements({ "time", "stddev", "stddev/time", "speedup", "converged", "iterations", "residue_precond", "residue_true" });
       }
 
       virtual RowElements getRowElements() const override
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 544d71662..e10969d3d 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -84,7 +84,7 @@ public:
 
    using CommonLogs = std::vector< std::pair< const char*, String > >;
 
-   using HeaderElements = std::vector< std::pair< String, int > >;
+   using HeaderElements = std::vector< String >;
    using RowElements = LoggingRowElements;
 
    Logging( int verbose = true,
@@ -149,7 +149,7 @@ public:
          std::cout << std::setw( 15 ) << "";
 
          for( auto & it : subElements ) {
-            std::cout << std::setw( 15 ) << it.first;
+            std::cout << std::setw( 15 ) << it;
          }
          std::cout << std::endl;
 
@@ -178,7 +178,7 @@ public:
 
       log << header_indent << " " << spanningElement << std::endl;
       for( auto & it : subElements ) {
-         log << header_indent << "! " << it.first << std::endl;
+         log << header_indent << "! " << it << std::endl;
       }
 
       if( horizontalGroups.size() > 0 ) {
diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index 61fae4f60..9475c9ba5 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -45,17 +45,12 @@ struct SpmvBenchmarkResult
 
    virtual HeaderElements getTableHeader() const override
    {
-      return HeaderElements( {
-         std::pair< String, int >( "format", 35 ),
-         std::pair< String, int >( "device", 12 ),
-         std::pair< String, int >( "non-zeros", 12 ),
-         std::pair< String, int >( "time", 12 ),
-         std::pair< String, int >( "stddev", 12 ),
-         std::pair< String, int >( "stddev/time", 14 ),
-         std::pair< String, int >( "bandwidth", 12 ),
-         std::pair< String, int >( "speedup", 12 ),
-         std::pair< String, int >( "CSR Diff.Max", 14 ),
-         std::pair< String, int >( "CSR Diff.L2", 14 ) } );
+      return HeaderElements({ "format", "device", "non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2" });
+   }
+
+   virtual std::vector< int > getColumnWidthHints() const override
+   {
+      return std::vector< int >({ 35, 12, 12, 12, 12, 14, 12, 12, 14, 14 });
    }
 
    void setFormat( const String& format ) { this->format = format; };
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 559adadff..49c811a81 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -611,7 +611,7 @@ benchmarkSpmv( BenchmarkType& benchmark,
    };
 
    SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader() );
+   benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader(), csrBenchmarkResults.getColumnWidthHints() );
    benchmark.writeHeader();
    benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults );
 
@@ -641,7 +641,7 @@ benchmarkSpmv( BenchmarkType& benchmark,
    };
 
    SpmvBenchmarkResult< Real, Devices::Host, int > petscBenchmarkResults( String( "Petsc" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   //benchmark.addLogsMetadata( petscBenchmarkResults.getTableHeader() );
+   //benchmark.addLogsMetadata( petscBenchmarkResults.getTableHeader(), petscBenchmarkResults.getColumnWidthHints() );
    //benchmark.writeHeader();
    benchmark.time< Devices::Host >( resetPetscVectors, "", petscSpmvCSRHost, petscBenchmarkResults );
 #endif
-- 
GitLab


From a65443031424da056af3366da1451bd939f59eea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 3 Nov 2021 18:00:02 +0100
Subject: [PATCH 03/40] Benchmarks: removed useless class
 JsonLoggingRowElements

---
 src/Benchmarks/JsonLogging.h | 66 +++---------------------------------
 src/Benchmarks/Logging.h     |  6 ++--
 2 files changed, 8 insertions(+), 64 deletions(-)

diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 6b4bd9c1c..431daf90b 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -13,69 +13,11 @@
 
 #pragma once
 
-#include <list>
-#include <map>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-#include <string>
-#include <sstream>
-
-#include <TNL/String.h>
+#include "Logging.h"
 
 namespace TNL {
 namespace Benchmarks {
 
-class JsonLoggingRowElements
-{
-   public:
-
-      JsonLoggingRowElements()
-      {
-         stream << std::setprecision( 6 ) << std::fixed;
-      }
-
-      template< typename T >
-      JsonLoggingRowElements& operator << ( const T& b )
-      {
-         stream << b;
-         elements.push_back( stream.str() );
-         stream.str( std::string() );
-         return *this;
-      }
-
-      JsonLoggingRowElements& operator << ( decltype( std::setprecision( 2 ) )& setprec )
-      {
-         stream << setprec;
-         return *this;
-      }
-
-      JsonLoggingRowElements& operator << ( decltype( std::fixed )& setfixed ) // the same works also for std::scientific
-      {
-         stream << setfixed;
-         return *this;
-      }
-
-      // iterators
-      auto begin() noexcept { return elements.begin(); }
-
-      auto begin() const noexcept { return elements.begin(); }
-
-      auto cbegin() const noexcept { return elements.cbegin(); }
-
-      auto end() noexcept { return elements.end(); }
-
-      auto end() const noexcept { return elements.end(); }
-
-      auto cend() const noexcept { return elements.cend(); }
-
-      size_t size() const noexcept { return this->elements.size(); };
-   protected:
-      std::list< String > elements;
-
-      std::stringstream stream;
-};
-
 class JsonLogging
 {
 public:
@@ -83,13 +25,13 @@ public:
    using MetadataMap = std::map< const char*, String >;
    using MetadataColumns = std::vector<MetadataElement>;
 
+   using HeaderElements = std::vector< String >;
+   using RowElements = LoggingRowElements;
+
    using CommonLogs = std::vector< std::pair< const char*, String > >;
    using LogsMetadata = HeaderElements;
    using WidthHints = std::vector< int >;
 
-   using HeaderElements = std::vector< String >;
-   using RowElements = JsonLoggingRowElements;
-
    JsonLogging( int verbose = true,
                 String outputMode = "",
                 bool logFileAppend = false )
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index e10969d3d..72445b4e0 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -56,6 +56,8 @@ class LoggingRowElements
          return *this;
       }
 
+      std::size_t size() const noexcept { return elements.size(); };
+
       // iterators
       auto begin() noexcept { return elements.begin(); }
 
@@ -82,11 +84,11 @@ public:
    using MetadataMap = std::map< const char*, String >;
    using MetadataColumns = std::vector<MetadataElement>;
 
-   using CommonLogs = std::vector< std::pair< const char*, String > >;
-
    using HeaderElements = std::vector< String >;
    using RowElements = LoggingRowElements;
 
+   using CommonLogs = std::vector< std::pair< const char*, String > >;
+
    Logging( int verbose = true,
             String outputMode = "",
             bool logFileAppend = false )
-- 
GitLab


From 4fbfbb48872ec34a59d01c7c82545b586ca1b94f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 3 Nov 2021 18:19:37 +0100
Subject: [PATCH 04/40] Benchmarks: refactoring to avoid useless templates on
 BenchmarkResult

---
 src/Benchmarks/BLAS/tnl-benchmark-blas.h      |   2 +-
 src/Benchmarks/Benchmark.hpp                  |   8 +-
 src/Benchmarks/Benchmarks.h                   |  20 +-
 src/Benchmarks/CMakeLists.txt                 |   1 +
 src/Benchmarks/CustomLogging.h                | 229 ++++++++++++++++++
 .../DistSpMV/tnl-benchmark-distributed-spmv.h |   2 +-
 src/Benchmarks/JsonLogging.h                  |  55 ++---
 src/Benchmarks/LinearSolvers/benchmarks.h     |   2 +-
 .../tnl-benchmark-linear-solvers.h            |   2 +-
 src/Benchmarks/Logging.h                      | 208 ++--------------
 .../NDArray/tnl-benchmark-ndarray-boundary.h  |   2 +-
 .../NDArray/tnl-benchmark-ndarray.h           |   2 +-
 .../ODESolvers/tnl-benchmark-ode-solvers.h    |   2 +-
 src/Benchmarks/SpMV/SpmvBenchmarkResult.h     |  14 +-
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h      |   2 +-
 15 files changed, 296 insertions(+), 255 deletions(-)
 create mode 100644 src/Benchmarks/CustomLogging.h

diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index 9b061adf6..bf2708112 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -185,7 +185,7 @@ main( int argc, char* argv[] )
    Benchmark<> benchmark( loops, verbose );
 
    // prepare global metadata
-   Benchmark<>::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    if( precision == "all" || precision == "float" )
       runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow );
diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index 1aa28d6ea..90eafb6b9 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -163,7 +163,7 @@ Benchmark< Logger >::
 time( ResetFunction reset,
       const String & performer,
       ComputeFunction & compute,
-      BenchmarkResult< Logger > & result )
+      BenchmarkResult & result )
 {
    result.time = std::numeric_limits<double>::quiet_NaN();
    result.stddev = std::numeric_limits<double>::quiet_NaN();
@@ -210,7 +210,7 @@ time( ResetFunction reset,
       const String& performer,
       ComputeFunction& compute )
 {
-   BenchmarkResult< Logger > result;
+   BenchmarkResult result;
    return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
 }
 
@@ -221,7 +221,7 @@ double
 Benchmark< Logger >::
 time( const String & performer,
       ComputeFunction & compute,
-      BenchmarkResult< Logger > & result )
+      BenchmarkResult & result )
 {
    result.time = std::numeric_limits<double>::quiet_NaN();
    result.stddev = std::numeric_limits<double>::quiet_NaN();
@@ -259,7 +259,7 @@ Benchmark< Logger >::
 time( const String & performer,
       ComputeFunction & compute )
 {
-   BenchmarkResult< Logger > result;
+   BenchmarkResult result;
    return time< Device, ComputeFunction >( performer, compute, result );
 }
 
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 5701cf747..5c59df58c 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -13,7 +13,7 @@
 
 #pragma once
 
-#include "Logging.h"
+#include "CustomLogging.h"
 
 #include <limits>
 
@@ -32,11 +32,10 @@ namespace Benchmarks {
 const double oneGB = 1024.0 * 1024.0 * 1024.0;
 
 
-template< typename Logger = Logging >
 struct BenchmarkResult
 {
-   using HeaderElements = typename Logger::HeaderElements;
-   using RowElements = typename Logger::RowElements;
+   using HeaderElements = typename Logging::HeaderElements;
+   using RowElements = typename Logging::RowElements;
 
    double time = std::numeric_limits<double>::quiet_NaN();
    double stddev = std::numeric_limits<double>::quiet_NaN();
@@ -65,7 +64,7 @@ struct BenchmarkResult
    }
 };
 
-template< typename Logger = Logging >
+template< typename Logger = CustomLogging >
 class Benchmark
 : protected Logger
 {
@@ -139,7 +138,7 @@ class Benchmark
       double time( ResetFunction reset,
                   const String & performer,
                   ComputeFunction & compute,
-                  BenchmarkResult< Logger > & result );
+                  BenchmarkResult & result );
 
       template< typename Device,
                typename ResetFunction,
@@ -148,7 +147,7 @@ class Benchmark
                         const String & performer,
                         ComputeFunction & compute );
       /*{
-         BenchmarkResult< Logger > result;
+         BenchmarkResult result;
          return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
       }*/
 
@@ -159,7 +158,7 @@ class Benchmark
                typename ComputeFunction >
       double time( const String & performer,
                   ComputeFunction & compute,
-                  BenchmarkResult< Logger > & result );
+                  BenchmarkResult & result );
 
       template< typename Device,
                typename ComputeFunction >
@@ -195,8 +194,7 @@ class Benchmark
 };
 
 
-template< typename Logger >
-inline typename Benchmark< Logger >::MetadataMap getHardwareMetadata()
+inline typename Logging::MetadataMap getHardwareMetadata()
 {
    const int cpu_id = 0;
    const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
@@ -218,7 +216,7 @@ inline typename Benchmark< Logger >::MetadataMap getHardwareMetadata()
       nproc = TNL::MPI::GetSize();
 #endif
 
-   typename Benchmark< Logger >::MetadataMap metadata {
+   typename Logging::MetadataMap metadata {
        { "host name", SystemInfo::getHostname() },
        { "architecture", SystemInfo::getArchitecture() },
        { "system", SystemInfo::getSystemName() },
diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 288439a26..3ea1dbd2b 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -13,6 +13,7 @@ set( headers
          Benchmark.hpp
          FunctionTimer.h
          Logging.h
+         CustomLogging.h
          JsonLogging.h
 )
 
diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
new file mode 100644
index 000000000..51c291ea9
--- /dev/null
+++ b/src/Benchmarks/CustomLogging.h
@@ -0,0 +1,229 @@
+/***************************************************************************
+                          CustomLogging.h  -  description
+                             -------------------
+    begin                : May 11, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky,
+//                 Tomas Oberhuber
+
+#pragma once
+
+#include "Logging.h"
+
+namespace TNL {
+namespace Benchmarks {
+
+class CustomLogging
+: public Logging
+{
+public:
+   CustomLogging( int verbose = true,
+                  String outputMode = "",
+                  bool logFileAppend = false )
+   : Logging(verbose), outputMode( outputMode )
+   {}
+
+   virtual void
+   writeTitle( const String & title ) override
+   {
+      if( verbose )
+         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
+      log << ": title = " << title << std::endl;
+   }
+
+   virtual void addCommonLogs( const CommonLogs& logs ) override
+   {
+      for( auto log : logs )
+      {
+         if( verbose )
+            std::cout << log.first << " = " << log.second << std::endl;
+      }
+   };
+
+   virtual void
+   writeMetadata( const MetadataMap & metadata ) override
+   {
+      if( verbose )
+         std::cout << "properties:" << std::endl;
+
+      for( auto & it : metadata ) {
+         if( verbose )
+            std::cout << "   " << it.first << " = " << it.second << std::endl;
+         log << ": " << it.first << " = " << it.second << std::endl;
+      }
+      if( verbose )
+         std::cout << std::endl;
+   }
+
+   virtual void
+   writeTableHeader( const String & spanningElement,
+                     const HeaderElements & subElements ) override
+   {
+      if( verbose && header_changed ) {
+         for( auto & it : metadataColumns ) {
+            std::cout << std::setw( 20 ) << it.first;
+         }
+
+         // spanning element is printed as usual column to stdout,
+         // but is excluded from header
+         std::cout << std::setw( 15 ) << "";
+
+         for( auto & it : subElements ) {
+            std::cout << std::setw( 15 ) << it;
+         }
+         std::cout << std::endl;
+
+         header_changed = false;
+      }
+
+      // initial indent string
+      header_indent = "!";
+      log << std::endl;
+      for( auto & it : metadataColumns ) {
+         log << header_indent << " " << it.first << std::endl;
+      }
+
+      // dump stacked spanning columns
+      if( horizontalGroups.size() > 0 )
+         while( horizontalGroups.back().second <= 0 ) {
+            horizontalGroups.pop_back();
+            header_indent.pop_back();
+         }
+      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
+         if( horizontalGroups[ i ].second > 0 ) {
+            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
+            header_indent += "!";
+         }
+      }
+
+      log << header_indent << " " << spanningElement << std::endl;
+      for( auto & it : subElements ) {
+         log << header_indent << "! " << it << std::endl;
+      }
+
+      if( horizontalGroups.size() > 0 ) {
+         horizontalGroups.back().second--;
+         header_indent.pop_back();
+      }
+   }
+
+   virtual void
+   writeTableRow( const String & spanningElement,
+                  const RowElements & subElements ) override
+   {
+      if( verbose ) {
+         for( auto & it : metadataColumns ) {
+            std::cout << std::setw( 20 ) << it.second;
+         }
+         // spanning element is printed as usual column to stdout
+         std::cout << std::setw( 15 ) << spanningElement;
+         for( auto & it : subElements ) {
+            std::cout << std::setw( 15 ) << it;
+         }
+         std::cout << std::endl;
+      }
+
+      // only when changed (the header has been already adjusted)
+      // print each element on separate line
+      for( auto & it : metadataColumns ) {
+         log << it.second << std::endl;
+      }
+
+      // benchmark data are indented
+      const String indent = "    ";
+      for( auto & it : subElements ) {
+         log << indent << it << std::endl;
+      }
+   }
+
+   virtual void
+   writeErrorMessage( const char* msg,
+                      int colspan = 1 ) override
+   {
+      // initial indent string
+      header_indent = "!";
+      log << std::endl;
+      for( auto & it : metadataColumns ) {
+         log << header_indent << " " << it.first << std::endl;
+      }
+
+      // make sure there is a header column for the message
+      if( horizontalGroups.size() == 0 )
+         horizontalGroups.push_back( {"", 1} );
+
+      // dump stacked spanning columns
+      while( horizontalGroups.back().second <= 0 ) {
+         horizontalGroups.pop_back();
+         header_indent.pop_back();
+      }
+      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
+         if( horizontalGroups[ i ].second > 0 ) {
+            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
+            header_indent += "!";
+         }
+      }
+      if( horizontalGroups.size() > 0 ) {
+         horizontalGroups.back().second -= colspan;
+         header_indent.pop_back();
+      }
+
+      // only when changed (the header has been already adjusted)
+      // print each element on separate line
+      for( auto & it : metadataColumns ) {
+         log << it.second << std::endl;
+      }
+      log << msg << std::endl;
+   }
+
+   virtual void
+   closeTable() override
+   {
+      log << std::endl;
+      header_indent = body_indent = "";
+      header_changed = true;
+      horizontalGroups.clear();
+   }
+
+   virtual bool save( std::ostream & logFile ) override
+   {
+      closeTable();
+      logFile << log.str();
+      if( logFile.good() ) {
+         log.str() = "";
+         return true;
+      }
+      return false;
+   }
+
+protected:
+   // manual double -> String conversion with fixed precision
+   static String
+   _to_string( double num, int precision = 0, bool fixed = false )
+   {
+      std::stringstream str;
+      if( fixed )
+         str << std::fixed;
+      if( precision )
+         str << std::setprecision( precision );
+      str << num;
+      return String( str.str().data() );
+   }
+
+   std::stringstream log;
+   std::string header_indent;
+   std::string body_indent;
+
+   MetadataColumns metadataColumns;
+   bool header_changed = true;
+   std::vector< std::pair< String, int > > horizontalGroups;
+
+   String outputMode;
+};
+
+} // namespace Benchmarks
+} // namespace TNL
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index b79d80ebf..e43c509f5 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -337,7 +337,7 @@ main( int argc, char* argv[] )
    Benchmark<> benchmark( loops, verbose );
 
    // prepare global metadata
-   Benchmark<>::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    // TODO: implement resolveMatrixType
 //   return ! Matrices::resolveMatrixType< MainConfig,
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 431daf90b..9a1fac592 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -19,32 +19,16 @@ namespace TNL {
 namespace Benchmarks {
 
 class JsonLogging
+: public Logging
 {
 public:
-   using MetadataElement = std::pair< const char*, String >;
-   using MetadataMap = std::map< const char*, String >;
-   using MetadataColumns = std::vector<MetadataElement>;
-
-   using HeaderElements = std::vector< String >;
-   using RowElements = LoggingRowElements;
-
-   using CommonLogs = std::vector< std::pair< const char*, String > >;
-   using LogsMetadata = HeaderElements;
-   using WidthHints = std::vector< int >;
-
    JsonLogging( int verbose = true,
                 String outputMode = "",
                 bool logFileAppend = false )
-   : verbose(verbose), outputMode( outputMode ), logFileAppend( logFileAppend )
+   : Logging(verbose), outputMode( outputMode ), logFileAppend( logFileAppend )
    {}
 
-   void
-   setVerbose( int verbose)
-   {
-      this->verbose = verbose;
-   }
-
-   void addCommonLogs( const CommonLogs& logs )
+   virtual void addCommonLogs( const CommonLogs& logs ) override
    {
       this->commonLogs = logs;
       if( verbose )
@@ -56,19 +40,19 @@ public:
       }
    };
 
-   void resetLogsMetada()
+   virtual void resetLogsMetada() override
    {
       this->logsMetadata.clear();
       this->widthHints.clear();
    }
 
-   void addLogsMetadata( const LogsMetadata& md, const WidthHints& widths )
+   virtual void addLogsMetadata( const LogsMetadata& md, const WidthHints& widths ) override
    {
       this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() );
       this->widthHints.insert( this->widthHints.end(), widths.begin(), widths.end() );
    }
 
-   void writeHeader()
+   virtual void writeHeader() override
    {
       TNL_ASSERT_EQ( this->logsMetadata.size(), this->widthHints.size(), "" );
       if( verbose )
@@ -112,8 +96,8 @@ public:
          std::cout << std::endl;
    }
 
-   void
-   writeTitle( const String & title )
+   virtual void
+   writeTitle( const String & title ) override
    {
       if( outputMode == "append" )
          return;
@@ -122,8 +106,8 @@ public:
          std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
    }
 
-   void
-   writeMetadata( const MetadataMap & metadata )
+   virtual void
+   writeMetadata( const MetadataMap & metadata ) override
    {
       if( outputMode == "append" )
          return;
@@ -140,32 +124,32 @@ public:
          std::cout << std::endl;
    }
 
-   void
+   virtual void
    writeTableHeader( const String & spanningElement,
-                     const HeaderElements & subElements )
+                     const HeaderElements & subElements ) override
    {
    }
 
-   void
+   virtual void
    writeTableRow( const String & spanningElement,
-                  const RowElements & subElements )
+                  const RowElements & subElements ) override
    {
       writeRow( subElements );
    }
 
-   void
+   virtual void
    writeErrorMessage( const char* msg,
-                      int colspan = 1 )
+                      int colspan = 1 ) override
    {
       log << "\"error\" : \"" << msg << "\"" << std::endl;
    }
 
-   void
-   closeTable()
+   virtual void
+   closeTable() override
    {
    }
 
-   bool save( std::ostream & logFile )
+   virtual bool save( std::ostream & logFile ) override
    {
       if( ! this->logFileAppend )
       {
@@ -199,7 +183,6 @@ protected:
    std::string header_indent;
    std::string body_indent;
 
-   int verbose;
    MetadataColumns metadataColumns;
    bool header_changed = true;
    std::vector< std::pair< String, int > > horizontalGroups;
diff --git a/src/Benchmarks/LinearSolvers/benchmarks.h b/src/Benchmarks/LinearSolvers/benchmarks.h
index 899ccbd48..2d2fe825a 100644
--- a/src/Benchmarks/LinearSolvers/benchmarks.h
+++ b/src/Benchmarks/LinearSolvers/benchmarks.h
@@ -126,7 +126,7 @@ benchmarkSolver( Benchmark<>& benchmark,
 
    // subclass BenchmarkResult to add extra columns to the benchmark
    // (iterations, preconditioned residue, true residue)
-   struct MyBenchmarkResult : public BenchmarkResult<>
+   struct MyBenchmarkResult : public BenchmarkResult
    {
       using HeaderElements = BenchmarkResult::HeaderElements;
       using RowElements = BenchmarkResult::RowElements;
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 0c1651320..53175efe8 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -617,7 +617,7 @@ main( int argc, char* argv[] )
    Benchmark<> benchmark( loops, verbose );
 
    // prepare global metadata
-   Benchmark<>::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    // TODO: implement resolveMatrixType
 //   return ! Matrices::resolveMatrixType< MainConfig,
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 72445b4e0..44ede7277 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -88,11 +88,11 @@ public:
    using RowElements = LoggingRowElements;
 
    using CommonLogs = std::vector< std::pair< const char*, String > >;
+   using LogsMetadata = HeaderElements;
+   using WidthHints = std::vector< int >;
 
-   Logging( int verbose = true,
-            String outputMode = "",
-            bool logFileAppend = false )
-   : verbose(verbose), outputMode( outputMode )
+   Logging( int verbose = true )
+   : verbose(verbose)
    {}
 
    void
@@ -101,206 +101,36 @@ public:
       this->verbose = verbose;
    }
 
-   void
-   writeTitle( const String & title )
-   {
-      if( verbose )
-         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
-      log << ": title = " << title << std::endl;
-   }
+   virtual void writeTitle( const String & title ) = 0;
 
-   void addCommonLogs( const CommonLogs& logs )
-   {
-      for( auto log : logs )
-      {
-         if( verbose )
-            std::cout << log.first << " = " << log.second << std::endl;
-      }
-   };
+   virtual void addCommonLogs( const CommonLogs& logs ) = 0;
 
-   void addLogsMetadata( const std::vector< String >& md ){};
+   virtual void resetLogsMetada() {}
 
-   void writeHeader(){};
+   virtual void addLogsMetadata( const LogsMetadata& md, const WidthHints& widths ) {}
 
-   void
-   writeMetadata( const MetadataMap & metadata )
-   {
-      if( verbose )
-         std::cout << "properties:" << std::endl;
+   virtual void writeHeader() {}
 
-      for( auto & it : metadata ) {
-         if( verbose )
-            std::cout << "   " << it.first << " = " << it.second << std::endl;
-         log << ": " << it.first << " = " << it.second << std::endl;
-      }
-      if( verbose )
-         std::cout << std::endl;
-   }
+   virtual void writeMetadata( const MetadataMap & metadata ) {}
 
-   void
+   virtual void
    writeTableHeader( const String & spanningElement,
-                     const HeaderElements & subElements )
-   {
-      if( verbose && header_changed ) {
-         for( auto & it : metadataColumns ) {
-            std::cout << std::setw( 20 ) << it.first;
-         }
-
-         // spanning element is printed as usual column to stdout,
-         // but is excluded from header
-         std::cout << std::setw( 15 ) << "";
-
-         for( auto & it : subElements ) {
-            std::cout << std::setw( 15 ) << it;
-         }
-         std::cout << std::endl;
+                     const HeaderElements & subElements ) = 0;
 
-         header_changed = false;
-      }
-
-      // initial indent string
-      header_indent = "!";
-      log << std::endl;
-      for( auto & it : metadataColumns ) {
-         log << header_indent << " " << it.first << std::endl;
-      }
-
-      // dump stacked spanning columns
-      if( horizontalGroups.size() > 0 )
-         while( horizontalGroups.back().second <= 0 ) {
-            horizontalGroups.pop_back();
-            header_indent.pop_back();
-         }
-      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
-         if( horizontalGroups[ i ].second > 0 ) {
-            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
-            header_indent += "!";
-         }
-      }
-
-      log << header_indent << " " << spanningElement << std::endl;
-      for( auto & it : subElements ) {
-         log << header_indent << "! " << it << std::endl;
-      }
-
-      if( horizontalGroups.size() > 0 ) {
-         horizontalGroups.back().second--;
-         header_indent.pop_back();
-      }
-   }
-
-   void
+   virtual void
    writeTableRow( const String & spanningElement,
-                  const RowElements & subElements )
-   {
-      if( verbose ) {
-         for( auto & it : metadataColumns ) {
-            std::cout << std::setw( 20 ) << it.second;
-         }
-         // spanning element is printed as usual column to stdout
-         std::cout << std::setw( 15 ) << spanningElement;
-         for( auto & it : subElements ) {
-            std::cout << std::setw( 15 ) << it;
-         }
-         std::cout << std::endl;
-      }
+                  const RowElements & subElements ) = 0;
 
-      // only when changed (the header has been already adjusted)
-      // print each element on separate line
-      for( auto & it : metadataColumns ) {
-         log << it.second << std::endl;
-      }
-
-      // benchmark data are indented
-      const String indent = "    ";
-      for( auto & it : subElements ) {
-         log << indent << it << std::endl;
-      }
-   }
-
-   void
+   virtual void
    writeErrorMessage( const char* msg,
-                      int colspan = 1 )
-   {
-      // initial indent string
-      header_indent = "!";
-      log << std::endl;
-      for( auto & it : metadataColumns ) {
-         log << header_indent << " " << it.first << std::endl;
-      }
+                      int colspan = 1 ) = 0;
 
-      // make sure there is a header column for the message
-      if( horizontalGroups.size() == 0 )
-         horizontalGroups.push_back( {"", 1} );
+   virtual void closeTable() = 0;
 
-      // dump stacked spanning columns
-      while( horizontalGroups.back().second <= 0 ) {
-         horizontalGroups.pop_back();
-         header_indent.pop_back();
-      }
-      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
-         if( horizontalGroups[ i ].second > 0 ) {
-            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
-            header_indent += "!";
-         }
-      }
-      if( horizontalGroups.size() > 0 ) {
-         horizontalGroups.back().second -= colspan;
-         header_indent.pop_back();
-      }
-
-      // only when changed (the header has been already adjusted)
-      // print each element on separate line
-      for( auto & it : metadataColumns ) {
-         log << it.second << std::endl;
-      }
-      log << msg << std::endl;
-   }
-
-   void
-   closeTable()
-   {
-      log << std::endl;
-      header_indent = body_indent = "";
-      header_changed = true;
-      horizontalGroups.clear();
-   }
-
-   bool save( std::ostream & logFile )
-   {
-      closeTable();
-      logFile << log.str();
-      if( logFile.good() ) {
-         log.str() = "";
-         return true;
-      }
-      return false;
-   }
+   virtual bool save( std::ostream & logFile ) = 0;
 
 protected:
-   // manual double -> String conversion with fixed precision
-   static String
-   _to_string( double num, int precision = 0, bool fixed = false )
-   {
-      std::stringstream str;
-      if( fixed )
-         str << std::fixed;
-      if( precision )
-         str << std::setprecision( precision );
-      str << num;
-      return String( str.str().data() );
-   }
-
-   std::stringstream log;
-   std::string header_indent;
-   std::string body_indent;
-
-   int verbose;
-   MetadataColumns metadataColumns;
-   bool header_changed = true;
-   std::vector< std::pair< String, int > > horizontalGroups;
-
-   String outputMode;
+   int verbose = 0;
 };
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
index f7a485aa1..9400a473f 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
@@ -446,7 +446,7 @@ int main( int argc, char* argv[] )
    Benchmark<> benchmark( loops, verbose );
 
    // prepare global metadata
-   Benchmark<>::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    const String devices = parameters.getParameter< String >( "devices" );
    if( devices == "all" || devices == "host" )
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index 8d4ac8e7a..094fbd1af 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -434,7 +434,7 @@ int main( int argc, char* argv[] )
    Benchmark<> benchmark( loops, verbose );
 
    // prepare global metadata
-   Benchmark<>::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    const String devices = parameters.getParameter< String >( "devices" );
    if( devices == "all" || devices == "host" )
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index afdf33d3a..3f091438f 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -248,7 +248,7 @@ main( int argc, char* argv[] )
    Benchmark<> benchmark( loops, verbose );
 
    // prepare global metadata
-   Benchmark<>::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    const bool status = resolveRealTypes( benchmark, metadata, parameters );
 
diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index 9475c9ba5..f04d197ea 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -21,7 +21,7 @@ template< typename Real,
           typename ResultReal = Real,
           typename Logger = JsonLogging >
 struct SpmvBenchmarkResult
-: public BenchmarkResult< Logger >
+: public BenchmarkResult
 {
    using RealType = Real;
    using DeviceType = Device;
@@ -29,12 +29,12 @@ struct SpmvBenchmarkResult
    using HostVector = Containers::Vector< Real, Devices::Host, Index >;
    using BenchmarkVector = Containers::Vector< ResultReal, Device, Index >;
 
-   using typename BenchmarkResult< Logger >::HeaderElements;
-   using typename BenchmarkResult< Logger >::RowElements;
-   using BenchmarkResult< Logger >::stddev;
-   using BenchmarkResult< Logger >::bandwidth;
-   using BenchmarkResult< Logger >::speedup;
-   using BenchmarkResult< Logger >::time;
+   using typename BenchmarkResult::HeaderElements;
+   using typename BenchmarkResult::RowElements;
+   using BenchmarkResult::stddev;
+   using BenchmarkResult::bandwidth;
+   using BenchmarkResult::speedup;
+   using BenchmarkResult::time;
 
 
    SpmvBenchmarkResult( const String& format,
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index c5ff2bb3f..48349ff73 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -166,7 +166,7 @@ main( int argc, char* argv[] )
    TNL::Benchmarks::SpMV::BenchmarkType benchmark( loops, verbose, outputMode, logFileAppend );
 
    // prepare global metadata
-   TNL::Benchmarks::SpMV::BenchmarkType::MetadataMap metadata = getHardwareMetadata< Logging >();
+   Logging::MetadataMap metadata = getHardwareMetadata();
 
    // Initiate setup of benchmarks
    if( precision == "all" || precision == "float" )
-- 
GitLab


From 0511eb744be27dfc0a968c6793cac74d2a3c9945 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 13:56:07 +0100
Subject: [PATCH 05/40] Benchmarks: removed horizontal groups to bring the
 custom format closer to json

---
 src/Benchmarks/BLAS/dense-mv.h |  8 ++----
 src/Benchmarks/BLAS/spmv.h     |  8 ++----
 src/Benchmarks/Benchmark.hpp   | 49 ++++++++++++++++------------------
 src/Benchmarks/Benchmarks.h    | 20 +++++---------
 src/Benchmarks/CustomLogging.h | 43 +----------------------------
 src/Benchmarks/JsonLogging.h   |  4 +--
 src/Benchmarks/Logging.h       |  3 +--
 7 files changed, 37 insertions(+), 98 deletions(-)

diff --git a/src/Benchmarks/BLAS/dense-mv.h b/src/Benchmarks/BLAS/dense-mv.h
index 1204257cc..1a532fbec 100644
--- a/src/Benchmarks/BLAS/dense-mv.h
+++ b/src/Benchmarks/BLAS/dense-mv.h
@@ -50,13 +50,9 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
    HostVector inHostVector, outHostVector;
    CudaVector inCudaVector, outCudaVector1, outCudaVector2;
 
-   // create benchmark group
+   // set metadata
    const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() );
-#ifdef HAVE_CUDA
-   benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
-#else
-   benchmark.createHorizontalGroup( parsedType[ 0 ], 1 );
-#endif
+   benchmark.setMetadataElement({ "format", parsedType[ 0 ] });
 
    hostMatrix.setDimensions( size, size );
    inHostVector.setSize( size );
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index 6cd669dc0..d14343153 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -113,13 +113,9 @@ benchmarkSpMV( Benchmark<> & benchmark,
    HostVector hostVector, hostVector2;
    CudaVector deviceVector, deviceVector2;
 
-   // create benchmark group
+   // set metadata
    const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() );
-#ifdef HAVE_CUDA
-   benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
-#else
-   benchmark.createHorizontalGroup( parsedType[ 0 ], 1 );
-#endif
+   benchmark.setMetadataElement({ "format", parsedType[ 0 ] });
 
    hostRowLengths.setSize( size );
    hostMatrix.setDimensions( size, size );
diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index 90eafb6b9..914d567ae 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -105,6 +105,27 @@ setMetadataColumns( const MetadataColumns & metadata )
    Logger::metadataColumns = metadata;
 }
 
+template< typename Logger >
+void
+Benchmark< Logger >::
+setMetadataElement( const typename MetadataColumns::value_type & element )
+{
+   bool found = false;
+   for( auto & it : Logger::metadataColumns )
+      if( it.first == element.first ) {
+         if( it.second != element.second ) {
+            it.second = element.second;
+            Logger::header_changed = true;
+         }
+         found = true;
+         break;
+      }
+   if( ! found ) {
+      Logger::metadataColumns.push_back( element );
+      Logger::header_changed = true;
+   }
+}
+
 template< typename Logger >
 void
 Benchmark< Logger >::
@@ -133,27 +154,6 @@ setOperation( const double datasetSize,
    this->baseTime = baseTime;
 }
 
-template< typename Logger >
-void
-Benchmark< Logger >::
-createHorizontalGroup( const String & name,
-                       int subcolumns )
-{
-   if( Logger::horizontalGroups.size() == 0 ) {
-      Logger::horizontalGroups.push_back( {name, subcolumns} );
-   }
-   else {
-      auto & last = Logger::horizontalGroups.back();
-      if( last.first != name && last.second > 0 ) {
-         Logger::horizontalGroups.push_back( {name, subcolumns} );
-      }
-      else {
-         last.first = name;
-         last.second = subcolumns;
-      }
-   }
-}
-
 template< typename Logger >
    template< typename Device,
              typename ResetFunction,
@@ -266,12 +266,9 @@ time( const String & performer,
 template< typename Logger >
 void
 Benchmark< Logger >::
-addErrorMessage( const char* msg,
-                 int numberOfComputations )
+addErrorMessage( const char* msg )
 {
-   // each computation has 3 subcolumns
-   const int colspan = 3 * numberOfComputations;
-   Logger::writeErrorMessage( msg, colspan );
+   Logger::writeErrorMessage( msg );
    std::cerr << msg << std::endl;
 }
 
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 5c59df58c..ec39b88b2 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -105,7 +105,10 @@ class Benchmark
       // the next call to this function.
       void setMetadataColumns( const MetadataColumns & metadata );
 
-      // TODO: maybe should be renamed to createVerticalGroup and ensured that vertical and horizontal groups are not used within the same "Benchmark"
+      // Sets the value of one metadata column -- useful for iteratively
+      // changing MetadataColumns that were set using the previous method.
+      void setMetadataElement( const typename MetadataColumns::value_type & element );
+
       // Sets current operation -- operations expand the table vertically
       //  - baseTime should be reset to 0.0 for most operations, but sometimes
       //    it is useful to override it
@@ -119,17 +122,9 @@ class Benchmark
       void setOperation( const double datasetSize = 0.0,
                         const double baseTime = 0.0 );
 
-      // Creates new horizontal groups inside a benchmark -- increases the number
-      // of columns in the "Benchmark", implies column spanning.
-      // (Useful e.g. for SpMV formats, different configurations etc.)
-      void
-      createHorizontalGroup( const String & name,
-                           int subcolumns );
-
       // Times a single ComputeFunction. Subsequent calls implicitly split
-      // the current "horizontal group" into sub-columns identified by
-      // "performer", which are further split into "bandwidth", "time" and
-      // "speedup" columns.
+      // the current operation into sub-columns identified by "performer",
+      // which are further split into "bandwidth", "time" and "speedup" columns.
       // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation)
       // Also terminates the recursion of the following variadic template.
       template< typename Device,
@@ -167,8 +162,7 @@ class Benchmark
 
       // Adds an error message to the log. Should be called in places where the
       // "time" method could not be called (e.g. due to failed allocation).
-      void addErrorMessage( const char* msg,
-                           int numberOfComputations = 1 );
+      void addErrorMessage( const char* msg );
 
       using Logger::save;
 
diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index 51c291ea9..ca421fe5c 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -88,28 +88,10 @@ public:
          log << header_indent << " " << it.first << std::endl;
       }
 
-      // dump stacked spanning columns
-      if( horizontalGroups.size() > 0 )
-         while( horizontalGroups.back().second <= 0 ) {
-            horizontalGroups.pop_back();
-            header_indent.pop_back();
-         }
-      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
-         if( horizontalGroups[ i ].second > 0 ) {
-            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
-            header_indent += "!";
-         }
-      }
-
       log << header_indent << " " << spanningElement << std::endl;
       for( auto & it : subElements ) {
          log << header_indent << "! " << it << std::endl;
       }
-
-      if( horizontalGroups.size() > 0 ) {
-         horizontalGroups.back().second--;
-         header_indent.pop_back();
-      }
    }
 
    virtual void
@@ -142,8 +124,7 @@ public:
    }
 
    virtual void
-   writeErrorMessage( const char* msg,
-                      int colspan = 1 ) override
+   writeErrorMessage( const char* msg ) override
    {
       // initial indent string
       header_indent = "!";
@@ -152,26 +133,6 @@ public:
          log << header_indent << " " << it.first << std::endl;
       }
 
-      // make sure there is a header column for the message
-      if( horizontalGroups.size() == 0 )
-         horizontalGroups.push_back( {"", 1} );
-
-      // dump stacked spanning columns
-      while( horizontalGroups.back().second <= 0 ) {
-         horizontalGroups.pop_back();
-         header_indent.pop_back();
-      }
-      for( size_t i = 0; i < horizontalGroups.size(); i++ ) {
-         if( horizontalGroups[ i ].second > 0 ) {
-            log << header_indent << " " << horizontalGroups[ i ].first << std::endl;
-            header_indent += "!";
-         }
-      }
-      if( horizontalGroups.size() > 0 ) {
-         horizontalGroups.back().second -= colspan;
-         header_indent.pop_back();
-      }
-
       // only when changed (the header has been already adjusted)
       // print each element on separate line
       for( auto & it : metadataColumns ) {
@@ -186,7 +147,6 @@ public:
       log << std::endl;
       header_indent = body_indent = "";
       header_changed = true;
-      horizontalGroups.clear();
    }
 
    virtual bool save( std::ostream & logFile ) override
@@ -220,7 +180,6 @@ protected:
 
    MetadataColumns metadataColumns;
    bool header_changed = true;
-   std::vector< std::pair< String, int > > horizontalGroups;
 
    String outputMode;
 };
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 9a1fac592..7e1807fdd 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -138,8 +138,7 @@ public:
    }
 
    virtual void
-   writeErrorMessage( const char* msg,
-                      int colspan = 1 ) override
+   writeErrorMessage( const char* msg ) override
    {
       log << "\"error\" : \"" << msg << "\"" << std::endl;
    }
@@ -185,7 +184,6 @@ protected:
 
    MetadataColumns metadataColumns;
    bool header_changed = true;
-   std::vector< std::pair< String, int > > horizontalGroups;
 
    // new JSON implementation
    LogsMetadata logsMetadata;
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 44ede7277..b7002bcbc 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -122,8 +122,7 @@ public:
                   const RowElements & subElements ) = 0;
 
    virtual void
-   writeErrorMessage( const char* msg,
-                      int colspan = 1 ) = 0;
+   writeErrorMessage( const char* msg ) = 0;
 
    virtual void closeTable() = 0;
 
-- 
GitLab


From d416a5980038adffea1767c84df688143aba7d49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 14:02:20 +0100
Subject: [PATCH 06/40] Benchmarks: cleaned up indenting in CustomLogging

---
 src/Benchmarks/CustomLogging.h | 13 ++++---------
 src/Benchmarks/JsonLogging.h   |  5 -----
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index ca421fe5c..e67daab7d 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -82,15 +82,14 @@ public:
       }
 
       // initial indent string
-      header_indent = "!";
       log << std::endl;
       for( auto & it : metadataColumns ) {
-         log << header_indent << " " << it.first << std::endl;
+         log << "! " << it.first << std::endl;
       }
 
-      log << header_indent << " " << spanningElement << std::endl;
+      log << "! " << spanningElement << std::endl;
       for( auto & it : subElements ) {
-         log << header_indent << "! " << it << std::endl;
+         log << "!! " << it << std::endl;
       }
    }
 
@@ -127,10 +126,9 @@ public:
    writeErrorMessage( const char* msg ) override
    {
       // initial indent string
-      header_indent = "!";
       log << std::endl;
       for( auto & it : metadataColumns ) {
-         log << header_indent << " " << it.first << std::endl;
+         log << "! " << it.first << std::endl;
       }
 
       // only when changed (the header has been already adjusted)
@@ -145,7 +143,6 @@ public:
    closeTable() override
    {
       log << std::endl;
-      header_indent = body_indent = "";
       header_changed = true;
    }
 
@@ -175,8 +172,6 @@ protected:
    }
 
    std::stringstream log;
-   std::string header_indent;
-   std::string body_indent;
 
    MetadataColumns metadataColumns;
    bool header_changed = true;
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 7e1807fdd..590c18328 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -179,11 +179,6 @@ protected:
    }
 
    std::stringstream log;
-   std::string header_indent;
-   std::string body_indent;
-
-   MetadataColumns metadataColumns;
-   bool header_changed = true;
 
    // new JSON implementation
    LogsMetadata logsMetadata;
-- 
GitLab


From 5abc0c5c3d96a6f0b27da6dfd6044a77ed971ee7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 16:14:50 +0100
Subject: [PATCH 07/40] Benchmarks: refactored verbose flag

---
 src/Benchmarks/Benchmark.hpp   | 12 ++++++------
 src/Benchmarks/FunctionTimer.h |  7 ++-----
 src/Benchmarks/Logging.h       |  7 ++++++-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index 914d567ae..f7ab5bf68 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -173,15 +173,15 @@ time( ResetFunction reset,
          // run the monitor main loop
          Solvers::SolverMonitorThread monitor_thread( monitor );
          if( this->reset )
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, Logger::verbose, monitor );
+            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
          else
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, Logger::verbose, monitor );
+            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
       }
       else {
          if( this->reset )
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, Logger::verbose, monitor );
+            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
          else
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, Logger::verbose, monitor );
+            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
       }
       this->performedLoops = functionTimer.getPerformedLoops();
    }
@@ -230,10 +230,10 @@ time( const String & performer,
       if( Logger::verbose > 1 ) {
          // run the monitor main loop
          Solvers::SolverMonitorThread monitor_thread( monitor );
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, Logger::verbose, monitor );
+         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
       }
       else {
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, Logger::verbose, monitor );
+         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
       }
    }
    catch ( const std::exception& e ) {
diff --git a/src/Benchmarks/FunctionTimer.h b/src/Benchmarks/FunctionTimer.h
index 1edd61204..bb7617c16 100644
--- a/src/Benchmarks/FunctionTimer.h
+++ b/src/Benchmarks/FunctionTimer.h
@@ -37,15 +37,13 @@ public:
                  ResetFunction reset,
                  int maxLoops,
                  const double& minTime,
-                 int verbose = 1,
                  Monitor && monitor = Monitor() )
    {
       // the timer is constructed zero-initialized and stopped
       Timer timer;
 
       // set timer to the monitor
-      if( verbose > 1 )
-         monitor.setTimer( timer );
+      monitor.setTimer( timer );
 
       // warm up
       reset();
@@ -100,11 +98,10 @@ public:
    timeFunction( ComputeFunction compute,
                  int maxLoops,
                  const double& minTime,
-                 int verbose = 1,
                  Monitor && monitor = Monitor() )
    {
       auto noReset = [] () {};
-      return timeFunction( compute, noReset, maxLoops, minTime, verbose, monitor );
+      return timeFunction( compute, noReset, maxLoops, minTime, monitor );
    }
 
    int getPerformedLoops() const
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index b7002bcbc..0de1e8418 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -96,11 +96,16 @@ public:
    {}
 
    void
-   setVerbose( int verbose)
+   setVerbose( int verbose )
    {
       this->verbose = verbose;
    }
 
+   int getVerbose() const
+   {
+      return verbose;
+   }
+
    virtual void writeTitle( const String & title ) = 0;
 
    virtual void addCommonLogs( const CommonLogs& logs ) = 0;
-- 
GitLab


From 683092bba9bb655776769b47dee570e6ee9869a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 16:24:00 +0100
Subject: [PATCH 08/40] Benchmarks: refactored string types in logging

---
 src/Benchmarks/CustomLogging.h | 24 +++++++++++++-----------
 src/Benchmarks/JsonLogging.h   | 20 ++++++++++----------
 src/Benchmarks/Logging.h       | 28 +++++++++++++---------------
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index e67daab7d..5ed8d7e89 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -23,13 +23,13 @@ class CustomLogging
 {
 public:
    CustomLogging( int verbose = true,
-                  String outputMode = "",
+                  std::string outputMode = "",
                   bool logFileAppend = false )
    : Logging(verbose), outputMode( outputMode )
    {}
 
    virtual void
-   writeTitle( const String & title ) override
+   writeTitle( const std::string & title ) override
    {
       if( verbose )
          std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
@@ -61,7 +61,7 @@ public:
    }
 
    virtual void
-   writeTableHeader( const String & spanningElement,
+   writeTableHeader( const std::string & spanningElement,
                      const HeaderElements & subElements ) override
    {
       if( verbose && header_changed ) {
@@ -94,7 +94,7 @@ public:
    }
 
    virtual void
-   writeTableRow( const String & spanningElement,
+   writeTableRow( const std::string & spanningElement,
                   const RowElements & subElements ) override
    {
       if( verbose ) {
@@ -116,14 +116,14 @@ public:
       }
 
       // benchmark data are indented
-      const String indent = "    ";
+      const std::string indent = "    ";
       for( auto & it : subElements ) {
          log << indent << it << std::endl;
       }
    }
 
    virtual void
-   writeErrorMessage( const char* msg ) override
+   writeErrorMessage( const std::string& message ) override
    {
       // initial indent string
       log << std::endl;
@@ -136,7 +136,9 @@ public:
       for( auto & it : metadataColumns ) {
          log << it.second << std::endl;
       }
-      log << msg << std::endl;
+
+      // write the message
+      log << message << std::endl;
    }
 
    virtual void
@@ -158,8 +160,8 @@ public:
    }
 
 protected:
-   // manual double -> String conversion with fixed precision
-   static String
+   // manual double -> string conversion with fixed precision
+   static std::string
    _to_string( double num, int precision = 0, bool fixed = false )
    {
       std::stringstream str;
@@ -168,7 +170,7 @@ protected:
       if( precision )
          str << std::setprecision( precision );
       str << num;
-      return String( str.str().data() );
+      return std::string( str.str().data() );
    }
 
    std::stringstream log;
@@ -176,7 +178,7 @@ protected:
    MetadataColumns metadataColumns;
    bool header_changed = true;
 
-   String outputMode;
+   std::string outputMode;
 };
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 590c18328..da9c9f43f 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -23,7 +23,7 @@ class JsonLogging
 {
 public:
    JsonLogging( int verbose = true,
-                String outputMode = "",
+                std::string outputMode = "",
                 bool logFileAppend = false )
    : Logging(verbose), outputMode( outputMode ), logFileAppend( logFileAppend )
    {}
@@ -97,7 +97,7 @@ public:
    }
 
    virtual void
-   writeTitle( const String & title ) override
+   writeTitle( const std::string & title ) override
    {
       if( outputMode == "append" )
          return;
@@ -125,22 +125,22 @@ public:
    }
 
    virtual void
-   writeTableHeader( const String & spanningElement,
+   writeTableHeader( const std::string & spanningElement,
                      const HeaderElements & subElements ) override
    {
    }
 
    virtual void
-   writeTableRow( const String & spanningElement,
+   writeTableRow( const std::string & spanningElement,
                   const RowElements & subElements ) override
    {
       writeRow( subElements );
    }
 
    virtual void
-   writeErrorMessage( const char* msg ) override
+   writeErrorMessage( const std::string& message ) override
    {
-      log << "\"error\" : \"" << msg << "\"" << std::endl;
+      log << "\"error\" : \"" << message << "\"" << std::endl;
    }
 
    virtual void
@@ -165,8 +165,8 @@ public:
    }
 
 protected:
-   // manual double -> String conversion with fixed precision
-   static String
+   // manual double -> string conversion with fixed precision
+   static std::string
    _to_string( double num, int precision = 0, bool fixed = false )
    {
       std::stringstream str;
@@ -175,7 +175,7 @@ protected:
       if( precision )
          str << std::setprecision( precision );
       str << num;
-      return String( str.str().data() );
+      return std::string( str.str().data() );
    }
 
    std::stringstream log;
@@ -184,7 +184,7 @@ protected:
    LogsMetadata logsMetadata;
    WidthHints widthHints;
    CommonLogs commonLogs;
-   String outputMode;
+   std::string outputMode;
 
    bool lineStarted = false;
    bool resultsStarted = false;
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 0de1e8418..977c96d2c 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -21,8 +21,6 @@
 #include <string>
 #include <sstream>
 
-#include <TNL/String.h>
-
 namespace TNL {
 namespace Benchmarks {
 
@@ -72,7 +70,7 @@ class LoggingRowElements
       auto cend() const noexcept { return elements.cend(); }
 
    protected:
-      std::list< String > elements;
+      std::list< std::string > elements;
 
       std::stringstream stream;
 };
@@ -80,14 +78,14 @@ class LoggingRowElements
 class Logging
 {
 public:
-   using MetadataElement = std::pair< const char*, String >;
-   using MetadataMap = std::map< const char*, String >;
-   using MetadataColumns = std::vector<MetadataElement>;
+   using MetadataElement = std::pair< std::string, std::string >;
+   using MetadataMap = std::map< std::string, std::string >;
+   using MetadataColumns = std::vector< MetadataElement >;
 
-   using HeaderElements = std::vector< String >;
+   using HeaderElements = std::vector< std::string >;
    using RowElements = LoggingRowElements;
 
-   using CommonLogs = std::vector< std::pair< const char*, String > >;
+   using CommonLogs = MetadataColumns;
    using LogsMetadata = HeaderElements;
    using WidthHints = std::vector< int >;
 
@@ -106,7 +104,7 @@ public:
       return verbose;
    }
 
-   virtual void writeTitle( const String & title ) = 0;
+   virtual void writeTitle( const std::string& title ) = 0;
 
    virtual void addCommonLogs( const CommonLogs& logs ) = 0;
 
@@ -119,19 +117,19 @@ public:
    virtual void writeMetadata( const MetadataMap & metadata ) {}
 
    virtual void
-   writeTableHeader( const String & spanningElement,
-                     const HeaderElements & subElements ) = 0;
+   writeTableHeader( const std::string& spanningElement,
+                     const HeaderElements& subElements ) = 0;
 
    virtual void
-   writeTableRow( const String & spanningElement,
-                  const RowElements & subElements ) = 0;
+   writeTableRow( const std::string& spanningElement,
+                  const RowElements& subElements ) = 0;
 
    virtual void
-   writeErrorMessage( const char* msg ) = 0;
+   writeErrorMessage( const std::string& message ) = 0;
 
    virtual void closeTable() = 0;
 
-   virtual bool save( std::ostream & logFile ) = 0;
+   virtual bool save( std::ostream& logFile ) = 0;
 
 protected:
    int verbose = 0;
-- 
GitLab


From 122840c0f0a97f7683bd59ec14c42a7b910f1784 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 18:44:04 +0100
Subject: [PATCH 09/40] Benchmarks: fixed bug in JsonLogging

The log should be written to the file in all modes, not only when
logFileAppend is true.
---
 src/Benchmarks/JsonLogging.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index da9c9f43f..c615a3812 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -155,8 +155,7 @@ public:
          logFile << "{" << std::endl;
          logFile << "   \"results\" : [ " << std::endl;
       }
-      else
-         logFile << log.str();
+      logFile << log.str();
       if( logFile.good() ) {
          log.str() = "";
          return true;
-- 
GitLab


From b6855c09c3e06e750599577394489b31ac83d04a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 20:30:40 +0100
Subject: [PATCH 10/40] Benchmarks: refactored time methods

---
 src/Benchmarks/Benchmark.hpp | 57 ++++++++++--------------------------
 1 file changed, 15 insertions(+), 42 deletions(-)

diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index f7ab5bf68..f5f9bf271 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -168,21 +168,18 @@ time( ResetFunction reset,
    result.time = std::numeric_limits<double>::quiet_NaN();
    result.stddev = std::numeric_limits<double>::quiet_NaN();
    FunctionTimer< Device > functionTimer;
+
+   // run the monitor main loop
+   Solvers::SolverMonitorThread monitor_thread( monitor );
+   if( Logger::verbose <= 1 )
+      // stop the main loop when not verbose
+      monitor.stopMainLoop();
+
    try {
-      if( Logger::verbose > 1 ) {
-         // run the monitor main loop
-         Solvers::SolverMonitorThread monitor_thread( monitor );
-         if( this->reset )
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
-         else
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
-      }
-      else {
-         if( this->reset )
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
-         else
-            std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
-      }
+      if( this->reset )
+         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
+      else
+         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
       this->performedLoops = functionTimer.getPerformedLoops();
    }
    catch ( const std::exception& e ) {
@@ -211,7 +208,7 @@ time( ResetFunction reset,
       ComputeFunction& compute )
 {
    BenchmarkResult result;
-   return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
+   return time< Device >( reset, performer, compute, result );
 }
 
 template< typename Logger >
@@ -223,32 +220,8 @@ time( const String & performer,
       ComputeFunction & compute,
       BenchmarkResult & result )
 {
-   result.time = std::numeric_limits<double>::quiet_NaN();
-   result.stddev = std::numeric_limits<double>::quiet_NaN();
-   FunctionTimer< Device > functionTimer;
-   try {
-      if( Logger::verbose > 1 ) {
-         // run the monitor main loop
-         Solvers::SolverMonitorThread monitor_thread( monitor );
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
-      }
-      else {
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
-      }
-   }
-   catch ( const std::exception& e ) {
-      std::cerr << "Function timer failed due to a C++ exception with description: " << e.what() << std::endl;
-   }
-
-   result.bandwidth = datasetSize / result.time;
-   result.speedup = this->baseTime / result.time;
-   if( this->baseTime == 0.0 )
-      this->baseTime = result.time;
-
-   Logger::writeTableHeader( performer, result.getTableHeader() );
-   Logger::writeTableRow( performer, result.getRowElements() );
-
-   return this->baseTime;
+   auto noReset = [] () {};
+   return time< Device >( noReset, performer, compute, result );
 }
 
 template< typename Logger >
@@ -260,7 +233,7 @@ time( const String & performer,
       ComputeFunction & compute )
 {
    BenchmarkResult result;
-   return time< Device, ComputeFunction >( performer, compute, result );
+   return time< Device >( performer, compute, result );
 }
 
 template< typename Logger >
-- 
GitLab


From d80fe51184c413785a0ffb8a95dc6746039060a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 21:28:07 +0100
Subject: [PATCH 11/40] Benchmarks: simplified and unified interface of
 CustomLogging and JsonLogging

---
 src/Benchmarks/BLAS/dense-mv.h |   2 +-
 src/Benchmarks/BLAS/spmv.h     |   2 +-
 src/Benchmarks/Benchmark.hpp   |  46 +++-------
 src/Benchmarks/Benchmarks.h    |  17 ++--
 src/Benchmarks/CustomLogging.h |  63 +++++++++++---
 src/Benchmarks/JsonLogging.h   | 148 ++++++++++++++++++---------------
 src/Benchmarks/Logging.h       |  27 ++----
 src/Benchmarks/SpMV/spmv.h     |  19 ++---
 8 files changed, 170 insertions(+), 154 deletions(-)

diff --git a/src/Benchmarks/BLAS/dense-mv.h b/src/Benchmarks/BLAS/dense-mv.h
index 1a532fbec..b3e4834a5 100644
--- a/src/Benchmarks/BLAS/dense-mv.h
+++ b/src/Benchmarks/BLAS/dense-mv.h
@@ -76,7 +76,7 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
    auto spmvHost = [&]() {
       hostMatrix.vectorProduct( inHostVector, outHostVector );
    };
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
 
 #ifdef HAVE_CUDA
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index d14343153..a6fcc30d7 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -156,7 +156,7 @@ benchmarkSpMV( Benchmark<> & benchmark,
    auto spmvHost = [&]() {
       hostMatrix.vectorProduct( hostVector, hostVector2 );
    };
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
 #ifdef HAVE_CUDA
    auto spmvCuda = [&]() {
diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index f5f9bf271..efa10deb1 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -100,9 +100,7 @@ void
 Benchmark< Logger >::
 setMetadataColumns( const MetadataColumns & metadata )
 {
-   if( Logger::metadataColumns != metadata )
-      Logger::header_changed = true;
-   Logger::metadataColumns = metadata;
+   Logger::setMetadataColumns( metadata );
 }
 
 template< typename Logger >
@@ -110,48 +108,29 @@ void
 Benchmark< Logger >::
 setMetadataElement( const typename MetadataColumns::value_type & element )
 {
-   bool found = false;
-   for( auto & it : Logger::metadataColumns )
-      if( it.first == element.first ) {
-         if( it.second != element.second ) {
-            it.second = element.second;
-            Logger::header_changed = true;
-         }
-         found = true;
-         break;
-      }
-   if( ! found ) {
-      Logger::metadataColumns.push_back( element );
-      Logger::header_changed = true;
-   }
+   Logger::setMetadataElement( element );
 }
 
 template< typename Logger >
 void
 Benchmark< Logger >::
-setOperation( const String & operation,
-              const double datasetSize,
-              const double baseTime )
+setDatasetSize( const double datasetSize,
+                const double baseTime )
 {
-   monitor.setStage( operation.getString() );
-   if( Logger::metadataColumns.size() > 0 && String(Logger::metadataColumns[ 0 ].first) == "operation" ) {
-      Logger::metadataColumns[ 0 ].second = operation;
-   }
-   else {
-      Logger::metadataColumns.insert( Logger::metadataColumns.begin(), {"operation", operation} );
-   }
-   setOperation( datasetSize, baseTime );
-   Logger::header_changed = true;
+   this->datasetSize = datasetSize;
+   this->baseTime = baseTime;
 }
 
 template< typename Logger >
 void
 Benchmark< Logger >::
-setOperation( const double datasetSize,
+setOperation( const String & operation,
+              const double datasetSize,
               const double baseTime )
 {
-   this->datasetSize = datasetSize;
-   this->baseTime = baseTime;
+   monitor.setStage( operation.getString() );
+   Logger::setMetadataElement( {"operation", operation}, 0 );
+   setDatasetSize( datasetSize, baseTime );
 }
 
 template< typename Logger >
@@ -191,8 +170,7 @@ time( ResetFunction reset,
    if( this->baseTime == 0.0 )
       this->baseTime = result.time;
 
-   Logger::writeTableHeader( performer, result.getTableHeader() );
-   Logger::writeTableRow( performer, result.getRowElements() );
+   Logger::logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints() );
 
    return this->baseTime;
 }
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index ec39b88b2..70cbba89b 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -74,11 +74,6 @@ class Benchmark
       using typename Logger::MetadataColumns;
       using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;
 
-      using typename Logger::CommonLogs;
-      using Logger::addCommonLogs;
-      using Logger::addLogsMetadata;
-      using Logger::writeHeader;
-
       Benchmark( int loops = 10,
                bool verbose = true,
                String outputMode = "",
@@ -109,6 +104,11 @@ class Benchmark
       // changing MetadataColumns that were set using the previous method.
       void setMetadataElement( const typename MetadataColumns::value_type & element );
 
+      // Sets the dataset size and base time for the calculations of bandwidth
+      // and speedup in the benchmarks result.
+      void setDatasetSize( const double datasetSize = 0.0, // in GB
+                           const double baseTime = 0.0 );
+
       // Sets current operation -- operations expand the table vertically
       //  - baseTime should be reset to 0.0 for most operations, but sometimes
       //    it is useful to override it
@@ -116,11 +116,8 @@ class Benchmark
       //    easily sorted while converting to HTML.)
       void
       setOperation( const String & operation,
-                  const double datasetSize = 0.0, // in GB
-                  const double baseTime = 0.0 );
-
-      void setOperation( const double datasetSize = 0.0,
-                        const double baseTime = 0.0 );
+                    const double datasetSize = 0.0, // in GB
+                    const double baseTime = 0.0 );
 
       // Times a single ComputeFunction. Subsequent calls implicitly split
       // the current operation into sub-columns identified by "performer",
diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index 5ed8d7e89..64e89f380 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "Logging.h"
+#include <TNL/Assert.h>
 
 namespace TNL {
 namespace Benchmarks {
@@ -36,15 +37,6 @@ public:
       log << ": title = " << title << std::endl;
    }
 
-   virtual void addCommonLogs( const CommonLogs& logs ) override
-   {
-      for( auto log : logs )
-      {
-         if( verbose )
-            std::cout << log.first << " = " << log.second << std::endl;
-      }
-   };
-
    virtual void
    writeMetadata( const MetadataMap & metadata ) override
    {
@@ -60,9 +52,44 @@ public:
          std::cout << std::endl;
    }
 
+   virtual void setMetadataColumns( const MetadataColumns& elements ) override
+   {
+      // check if a header element changed (i.e. a first item of the pairs)
+      if( metadataColumns.size() != elements.size() )
+         header_changed = true;
+      else
+         for( std::size_t i = 0; i < metadataColumns.size(); i++ )
+            if( metadataColumns[ i ].first != elements[ i ].first ) {
+               header_changed = true;
+               break;
+            }
+      metadataColumns = elements;
+   }
+
    virtual void
+   setMetadataElement( const typename MetadataColumns::value_type & element,
+                       int insertPosition = -1 /* negative values insert from the end */ ) override
+   {
+      bool found = false;
+      for( auto & it : metadataColumns )
+         if( it.first == element.first ) {
+            if( it.second != element.second )
+               it.second = element.second;
+            found = true;
+            break;
+         }
+      if( ! found ) {
+         if( insertPosition < 0 )
+            metadataColumns.insert( metadataColumns.end() + insertPosition + 1, element );
+         else
+            metadataColumns.insert( metadataColumns.begin() + insertPosition, element );
+         header_changed = true;
+      }
+   }
+
+   void
    writeTableHeader( const std::string & spanningElement,
-                     const HeaderElements & subElements ) override
+                     const HeaderElements & subElements )
    {
       if( verbose && header_changed ) {
          for( auto & it : metadataColumns ) {
@@ -93,9 +120,9 @@ public:
       }
    }
 
-   virtual void
+   void
    writeTableRow( const std::string & spanningElement,
-                  const RowElements & subElements ) override
+                  const RowElements & subElements )
    {
       if( verbose ) {
          for( auto & it : metadataColumns ) {
@@ -122,6 +149,18 @@ public:
       }
    }
 
+   virtual void
+   logResult( const std::string& spanningElement,
+              const HeaderElements& headerElements,
+              const RowElements& rowElements,
+              const WidthHints& columnWidthHints ) override
+   {
+      TNL_ASSERT_EQ( headerElements.size(), rowElements.size(), "elements must have equal sizes" );
+      TNL_ASSERT_EQ( headerElements.size(), columnWidthHints.size(), "elements must have equal sizes" );
+      writeTableHeader( spanningElement, headerElements );
+      writeTableRow( spanningElement, rowElements );
+   }
+
    virtual void
    writeErrorMessage( const std::string& message ) override
    {
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index c615a3812..bf972e9aa 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "Logging.h"
+#include <TNL/Assert.h>
 
 namespace TNL {
 namespace Benchmarks {
@@ -28,44 +29,87 @@ public:
    : Logging(verbose), outputMode( outputMode ), logFileAppend( logFileAppend )
    {}
 
-   virtual void addCommonLogs( const CommonLogs& logs ) override
+   virtual void
+   writeTitle( const std::string & title ) override
    {
-      this->commonLogs = logs;
+      if( outputMode == "append" )
+         return;
+
       if( verbose )
-      {
-         std::cout << std::endl << "Benchmark setup:" << std::endl;
-         for( auto lg : logs )
-            std::cout << "   " << lg.first << " = " << lg.second << std::endl;
-         std::cout << std::endl;
+         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
+   }
+
+   virtual void
+   writeMetadata( const MetadataMap & metadata ) override
+   {
+      if( outputMode == "append" )
+         return;
+
+      if( verbose )
+         std::cout << "properties:" << std::endl;
+
+      for( auto & it : metadata ) {
+         if( verbose )
+            std::cout << "   " << it.first << " = " << it.second << std::endl;
       }
-   };
 
-   virtual void resetLogsMetada() override
+      if( verbose )
+         std::cout << std::endl;
+   }
+
+   virtual void setMetadataColumns( const MetadataColumns& elements ) override
    {
-      this->logsMetadata.clear();
-      this->widthHints.clear();
+      // check if a header element changed (i.e. a first item of the pairs)
+      if( metadataColumns.size() != elements.size() )
+         header_changed = true;
+      else
+         for( std::size_t i = 0; i < metadataColumns.size(); i++ )
+            if( metadataColumns[ i ].first != elements[ i ].first ) {
+               header_changed = true;
+               break;
+            }
+      this->metadataColumns = elements;
    }
 
-   virtual void addLogsMetadata( const LogsMetadata& md, const WidthHints& widths ) override
+   virtual void
+   setMetadataElement( const typename MetadataColumns::value_type & element,
+                       int insertPosition = -1 /* negative values insert from the end */ ) override
    {
-      this->logsMetadata.insert( this->logsMetadata.end(), md.begin(), md.end() );
-      this->widthHints.insert( this->widthHints.end(), widths.begin(), widths.end() );
+      bool found = false;
+      for( auto & it : metadataColumns )
+         if( it.first == element.first ) {
+            if( it.second != element.second )
+               it.second = element.second;
+            found = true;
+            break;
+         }
+      if( ! found ) {
+         if( insertPosition < 0 )
+            metadataColumns.insert( metadataColumns.end() + insertPosition + 1, element );
+         else
+            metadataColumns.insert( metadataColumns.begin() + insertPosition, element );
+         header_changed = true;
+      }
    }
 
-   virtual void writeHeader() override
+   void writeHeader( const HeaderElements& headerElements, const WidthHints& widths )
    {
-      TNL_ASSERT_EQ( this->logsMetadata.size(), this->widthHints.size(), "" );
-      if( verbose )
+      TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
+      if( verbose && header_changed )
       {
-         for( std::size_t i = 0; i < this->logsMetadata.size(); i++ )
-            std::cout << std::setw( this->widthHints[ i ] ) << this->logsMetadata[ i ];
+         for( auto & lg : metadataColumns )
+            std::cout << std::setw( 20 ) << lg.first;
+         for( std::size_t i = 0; i < headerElements.size(); i++ )
+            std::cout << std::setw( widths[ i ] ) << headerElements[ i ];
          std::cout << std::endl;
+         header_changed = false;
       }
    }
 
-   void writeRow( const RowElements& rowEls )
+   void writeRow( const HeaderElements& headerElements, const RowElements& rowElements, const WidthHints& widths )
    {
-      TNL_ASSERT_EQ( rowEls.size(), this->logsMetadata.size(), "" );
+      TNL_ASSERT_EQ( headerElements.size(), rowElements.size(), "elements must have equal sizes" );
+      TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
       if( this->lineStarted )
          log << "," << std::endl;
 
@@ -73,21 +117,23 @@ public:
 
       // write common logs
       int idx( 0 );
-      for( auto lg : this->commonLogs )
+      for( auto lg : this->metadataColumns )
       {
+         if( verbose )
+            std::cout << std::setw( 20 ) << lg.second;
          if( idx++ > 0 )
             log << "," << std::endl;
          log << "         \"" << lg.first << "\" : \"" << lg.second << "\"";
       }
 
       std::size_t i = 0;
-      for( auto el : rowEls )
+      for( auto el : rowElements )
       {
          if( verbose )
-            std::cout << std::setw( this->widthHints[ i ] ) << el;
+            std::cout << std::setw( widths[ i ] ) << el;
          if( idx++ > 0 )
             log << "," << std::endl;
-         log << "         \"" << this->logsMetadata[ i ] << "\" : \"" << el << "\"";
+         log << "         \"" << headerElements[ i ] << "\" : \"" << el << "\"";
          i++;
       }
       log << std::endl << "      }";
@@ -97,44 +143,13 @@ public:
    }
 
    virtual void
-   writeTitle( const std::string & title ) override
+   logResult( const std::string& spanningElement,
+              const HeaderElements& headerElements,
+              const RowElements& rowElements,
+              const WidthHints& columnWidthHints ) override
    {
-      if( outputMode == "append" )
-         return;
-
-      if( verbose )
-         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
-   }
-
-   virtual void
-   writeMetadata( const MetadataMap & metadata ) override
-   {
-      if( outputMode == "append" )
-         return;
-
-      if( verbose )
-         std::cout << "properties:" << std::endl;
-
-      for( auto & it : metadata ) {
-         if( verbose )
-            std::cout << "   " << it.first << " = " << it.second << std::endl;
-      }
-
-      if( verbose )
-         std::cout << std::endl;
-   }
-
-   virtual void
-   writeTableHeader( const std::string & spanningElement,
-                     const HeaderElements & subElements ) override
-   {
-   }
-
-   virtual void
-   writeTableRow( const std::string & spanningElement,
-                  const RowElements & subElements ) override
-   {
-      writeRow( subElements );
+      writeHeader( headerElements, columnWidthHints );
+      writeRow( headerElements, rowElements, columnWidthHints );
    }
 
    virtual void
@@ -146,6 +161,7 @@ public:
    virtual void
    closeTable() override
    {
+      header_changed = true;
    }
 
    virtual bool save( std::ostream & logFile ) override
@@ -179,14 +195,12 @@ protected:
 
    std::stringstream log;
 
-   // new JSON implementation
-   LogsMetadata logsMetadata;
-   WidthHints widthHints;
-   CommonLogs commonLogs;
+   MetadataColumns metadataColumns;
+   bool header_changed = true;
+
    std::string outputMode;
 
    bool lineStarted = false;
-   bool resultsStarted = false;
    bool logFileAppend = false;
 };
 
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 977c96d2c..d18005184 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -84,9 +84,6 @@ public:
 
    using HeaderElements = std::vector< std::string >;
    using RowElements = LoggingRowElements;
-
-   using CommonLogs = MetadataColumns;
-   using LogsMetadata = HeaderElements;
    using WidthHints = std::vector< int >;
 
    Logging( int verbose = true )
@@ -106,26 +103,20 @@ public:
 
    virtual void writeTitle( const std::string& title ) = 0;
 
-   virtual void addCommonLogs( const CommonLogs& logs ) = 0;
-
-   virtual void resetLogsMetada() {}
-
-   virtual void addLogsMetadata( const LogsMetadata& md, const WidthHints& widths ) {}
+   virtual void writeMetadata( const MetadataMap & metadata ) = 0;
 
-   virtual void writeHeader() {}
+   virtual void setMetadataColumns( const MetadataColumns& elements ) = 0;
 
-   virtual void writeMetadata( const MetadataMap & metadata ) {}
+   virtual void setMetadataElement( const typename MetadataColumns::value_type & element,
+                                    int insertPosition = -1 /* negative values insert from the end */ ) = 0;
 
    virtual void
-   writeTableHeader( const std::string& spanningElement,
-                     const HeaderElements& subElements ) = 0;
+   logResult( const std::string& spanningElement,
+              const HeaderElements& headerElements,
+              const RowElements& rowElements,
+              const WidthHints& columnWidthHints ) = 0;
 
-   virtual void
-   writeTableRow( const std::string& spanningElement,
-                  const RowElements& subElements ) = 0;
-
-   virtual void
-   writeErrorMessage( const std::string& message ) = 0;
+   virtual void writeErrorMessage( const std::string& message ) = 0;
 
    virtual void closeTable() = 0;
 
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 49c811a81..0d508bc4a 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -226,7 +226,7 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
 
    const int elements = hostMatrix.getNonzeroElementsCount();
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
 
    /////
    // Benchmark SpMV on host
@@ -307,7 +307,7 @@ benchmarkSpMV( BenchmarkType& benchmark,
 
    const int elements = hostMatrix.getNonzeroElementsCount();
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
 
    /////
    // Benchmark SpMV on host
@@ -390,7 +390,7 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
 
    const int elements = hostMatrix.getNonzeroElementsCount();
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
 
    /////
    // Benchmark SpMV on host
@@ -493,7 +493,7 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
 
    const int elements = hostMatrix.getNonzeroElementsCount();
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
 
    /////
    // Benchmark SpMV on host
@@ -586,18 +586,19 @@ benchmarkSpmv( BenchmarkType& benchmark,
    MatrixReader< CSRHostMatrix >::readMtx( inputFileName, csrHostMatrix, verboseMR );
    const int elements = csrHostMatrix.getNonzeroElementsCount();
    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-   benchmark.setOperation( datasetSize );
+   benchmark.setDatasetSize( datasetSize );
 
    ////
    // Perform benchmark on host with CSR as a reference CPU format
    //
    auto nonzeros = csrHostMatrix.getNonzeroElementsCount();
-   benchmark.addCommonLogs( BenchmarkType::CommonLogs( {
+   benchmark.setMetadataColumns({
       { "matrix name", convertToString( inputFileName ) },
       { "rows", convertToString( csrHostMatrix.getRows() ) },
       { "columns", convertToString( csrHostMatrix.getColumns() ) },
       { "nonzeros", convertToString( nonzeros ) },
-      { "nonzeros per row", convertToString( ( double ) nonzeros / ( double ) csrHostMatrix.getRows() ) } } ) );
+      { "nonzeros per row", convertToString( ( double ) nonzeros / ( double ) csrHostMatrix.getRows() ) },
+   });
 
    HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
 
@@ -611,8 +612,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
    };
 
    SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   benchmark.addLogsMetadata( csrBenchmarkResults.getTableHeader(), csrBenchmarkResults.getColumnWidthHints() );
-   benchmark.writeHeader();
    benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults );
 
 #ifdef HAVE_PETSC
@@ -641,8 +640,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
    };
 
    SpmvBenchmarkResult< Real, Devices::Host, int > petscBenchmarkResults( String( "Petsc" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   //benchmark.addLogsMetadata( petscBenchmarkResults.getTableHeader(), petscBenchmarkResults.getColumnWidthHints() );
-   //benchmark.writeHeader();
    benchmark.time< Devices::Host >( resetPetscVectors, "", petscSpmvCSRHost, petscBenchmarkResults );
 #endif
 
-- 
GitLab


From e7db18c428cf4a8bffdf9fbe0d74b49eedc6324f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 21:38:57 +0100
Subject: [PATCH 12/40] Benchmarks: removed useless inheritance between the
 Benchmark and Logging classes

---
 src/Benchmarks/Benchmark.hpp | 34 +++++++++++++++++++++-------------
 src/Benchmarks/Benchmarks.h  | 10 +++++-----
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmark.hpp
index efa10deb1..899792e8c 100644
--- a/src/Benchmarks/Benchmark.hpp
+++ b/src/Benchmarks/Benchmark.hpp
@@ -29,7 +29,7 @@ Benchmark( int loops,
            bool verbose,
            String outputMode,
            bool logFileAppend )
-: Logger(verbose, outputMode, logFileAppend), loops(loops)
+: logger(verbose, outputMode, logFileAppend), loops(loops)
 {}
 
 template< typename Logger >
@@ -52,7 +52,7 @@ setup( const Config::ParameterContainer& parameters )
    this->reset = parameters.getParameter< bool >( "reset" );
    this->minTime = parameters.getParameter< double >( "min-time" );
    const int verbose = parameters.getParameter< int >( "verbose" );
-   Logger::setVerbose( verbose );
+   logger.setVerbose( verbose );
 }
 
 template< typename Logger >
@@ -76,8 +76,8 @@ void
 Benchmark< Logger >::
 newBenchmark( const String & title )
 {
-   Logger::closeTable();
-   Logger::writeTitle( title );
+   logger.closeTable();
+   logger.writeTitle( title );
 }
 
 template< typename Logger >
@@ -86,13 +86,13 @@ Benchmark< Logger >::
 newBenchmark( const String & title,
                MetadataMap metadata )
 {
-   Logger::closeTable();
-   Logger::writeTitle( title );
+   logger.closeTable();
+   logger.writeTitle( title );
    // add loops and reset flag to metadata
    metadata["loops"] = convertToString(loops);
    metadata["reset"] = convertToString( reset );
    metadata["minimal test time"] = convertToString( minTime );
-   Logger::writeMetadata( metadata );
+   logger.writeMetadata( metadata );
 }
 
 template< typename Logger >
@@ -100,7 +100,7 @@ void
 Benchmark< Logger >::
 setMetadataColumns( const MetadataColumns & metadata )
 {
-   Logger::setMetadataColumns( metadata );
+   logger.setMetadataColumns( metadata );
 }
 
 template< typename Logger >
@@ -108,7 +108,7 @@ void
 Benchmark< Logger >::
 setMetadataElement( const typename MetadataColumns::value_type & element )
 {
-   Logger::setMetadataElement( element );
+   logger.setMetadataElement( element );
 }
 
 template< typename Logger >
@@ -129,7 +129,7 @@ setOperation( const String & operation,
               const double baseTime )
 {
    monitor.setStage( operation.getString() );
-   Logger::setMetadataElement( {"operation", operation}, 0 );
+   logger.setMetadataElement( {"operation", operation}, 0 );
    setDatasetSize( datasetSize, baseTime );
 }
 
@@ -150,7 +150,7 @@ time( ResetFunction reset,
 
    // run the monitor main loop
    Solvers::SolverMonitorThread monitor_thread( monitor );
-   if( Logger::verbose <= 1 )
+   if( logger.getVerbose() <= 1 )
       // stop the main loop when not verbose
       monitor.stopMainLoop();
 
@@ -170,7 +170,7 @@ time( ResetFunction reset,
    if( this->baseTime == 0.0 )
       this->baseTime = result.time;
 
-   Logger::logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints() );
+   logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints() );
 
    return this->baseTime;
 }
@@ -219,10 +219,18 @@ void
 Benchmark< Logger >::
 addErrorMessage( const char* msg )
 {
-   Logger::writeErrorMessage( msg );
+   logger.writeErrorMessage( msg );
    std::cerr << msg << std::endl;
 }
 
+template< typename Logger >
+bool
+Benchmark< Logger >::
+save( std::ostream& logFile )
+{
+   return logger.save( logFile );
+}
+
 template< typename Logger >
 auto
 Benchmark< Logger >::
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 70cbba89b..cc36850d6 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -66,12 +66,11 @@ struct BenchmarkResult
 
 template< typename Logger = CustomLogging >
 class Benchmark
-: protected Logger
 {
    public:
-      using typename Logger::MetadataElement;
-      using typename Logger::MetadataMap;
-      using typename Logger::MetadataColumns;
+      using MetadataElement = typename Logger::MetadataElement;
+      using MetadataMap = typename Logger::MetadataMap;
+      using MetadataColumns = typename Logger::MetadataColumns;
       using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;
 
       Benchmark( int loops = 10,
@@ -161,7 +160,7 @@ class Benchmark
       // "time" method could not be called (e.g. due to failed allocation).
       void addErrorMessage( const char* msg );
 
-      using Logger::save;
+      bool save( std::ostream& logFile );
 
       SolverMonitorType& getMonitor();
 
@@ -170,6 +169,7 @@ class Benchmark
       bool isResetingOn() const;
 
    protected:
+      Logger logger;
 
       int loops = 1, performedLoops = 0;
 
-- 
GitLab


From a8c4fe27da1733e3a0464fe99fcfc7f3269e459c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Thu, 4 Nov 2021 21:29:08 +0100
Subject: [PATCH 13/40] SpMV benchmark: --input-file should be a required
 parameter, other minor fixes

---
 src/Benchmarks/SpMV/spmv.h               | 2 +-
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 0d508bc4a..70ed76652 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -821,6 +821,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
 #endif
 }
 
-      } // namespace SpMVLegacy
+      } // namespace SpMV
    } // namespace Benchmarks
 } // namespace TNL
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 48349ff73..0f3f29b1d 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -66,7 +66,7 @@ std::string getCurrDateTime()
    char buffer[ 80 ];
    time( &rawtime );
    timeinfo = localtime( &rawtime );
-   strftime( buffer, sizeof( buffer ), "%d-%m-%Y--%H:%M:%S", timeinfo );
+   strftime( buffer, sizeof( buffer ), "%Y-%m-%d--%H:%M:%S", timeinfo );
    std::string curr_date_time( buffer );
    return curr_date_time;
 }
@@ -75,7 +75,7 @@ void
 setupConfig( Config::ConfigDescription & config )
 {
    config.addDelimiter( "Benchmark settings:" );
-   config.addEntry< String >( "input-file", "Input file name.", "" );
+   config.addRequiredEntry< String >( "input-file", "Input file name." );
    config.addEntry< bool >( "with-symmetric-matrices", "Perform benchmark even for symmetric matrix formats.", true );
    config.addEntry< bool >( "with-legacy-matrices", "Perform benchmark even for legacy TNL matrix formats.", true );
    config.addEntry< bool >( "with-all-cpu-tests", "All matrix formats are tested on both CPU and GPU. ", false );
-- 
GitLab


From 973c4e293f02bffb6f8608a1c9ed3324a7b582e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 14:45:27 +0100
Subject: [PATCH 14/40] Renamed Benchmark.hpp to Benchmarks.hpp to match
 Benchmark.h

---
 src/Benchmarks/Benchmarks.h                      | 3 +--
 src/Benchmarks/{Benchmark.hpp => Benchmarks.hpp} | 0
 src/Benchmarks/CMakeLists.txt                    | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)
 rename src/Benchmarks/{Benchmark.hpp => Benchmarks.hpp} (100%)

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index cc36850d6..0e29b0637 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -31,7 +31,6 @@ namespace Benchmarks {
 
 const double oneGB = 1024.0 * 1024.0 * 1024.0;
 
-
 struct BenchmarkResult
 {
    using HeaderElements = typename Logging::HeaderElements;
@@ -240,4 +239,4 @@ inline typename Logging::MetadataMap getHardwareMetadata()
 } // namespace Benchmarks
 } // namespace TNL
 
-#include "Benchmark.hpp"
+#include "Benchmarks.hpp"
diff --git a/src/Benchmarks/Benchmark.hpp b/src/Benchmarks/Benchmarks.hpp
similarity index 100%
rename from src/Benchmarks/Benchmark.hpp
rename to src/Benchmarks/Benchmarks.hpp
diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 3ea1dbd2b..5d0cc9212 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -10,7 +10,7 @@ add_subdirectory( Traversers )
 
 set( headers
          Benchmarks.h
-         Benchmark.hpp
+         Benchmarks.hpp
          FunctionTimer.h
          Logging.h
          CustomLogging.h
-- 
GitLab


From 0411330cbfec6a58aeab5777fb0130baf598a917 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 14:54:02 +0100
Subject: [PATCH 15/40] Benchmarks: removed useless overload of the
 timeFunction

---
 src/Benchmarks/Benchmarks.hpp  |  6 ++++--
 src/Benchmarks/FunctionTimer.h | 14 --------------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/Benchmarks/Benchmarks.hpp b/src/Benchmarks/Benchmarks.hpp
index 899792e8c..ba7270278 100644
--- a/src/Benchmarks/Benchmarks.hpp
+++ b/src/Benchmarks/Benchmarks.hpp
@@ -157,8 +157,10 @@ time( ResetFunction reset,
    try {
       if( this->reset )
          std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
-      else
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, loops, minTime, monitor );
+      else {
+         auto noReset = [] () {};
+         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor );
+      }
       this->performedLoops = functionTimer.getPerformedLoops();
    }
    catch ( const std::exception& e ) {
diff --git a/src/Benchmarks/FunctionTimer.h b/src/Benchmarks/FunctionTimer.h
index bb7617c16..ee60db984 100644
--- a/src/Benchmarks/FunctionTimer.h
+++ b/src/Benchmarks/FunctionTimer.h
@@ -90,20 +90,6 @@ public:
       }
    }
 
-   // returns a pair of (mean, stddev) where mean is the arithmetic mean of the
-   // computation times and stddev is the sample standard deviation
-   template< typename ComputeFunction,
-             typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
-   std::pair< double, double >
-   timeFunction( ComputeFunction compute,
-                 int maxLoops,
-                 const double& minTime,
-                 Monitor && monitor = Monitor() )
-   {
-      auto noReset = [] () {};
-      return timeFunction( compute, noReset, maxLoops, minTime, monitor );
-   }
-
    int getPerformedLoops() const
    {
       return this->loops;
-- 
GitLab


From c0af5a3923aa14c6b98180f082dabc09926ef0f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 15:11:57 +0100
Subject: [PATCH 16/40] Benchmarks: exceptions due to timeFunction are added to
 the log

---
 src/Benchmarks/Benchmarks.h    |  2 +-
 src/Benchmarks/Benchmarks.hpp  | 12 +++++++-----
 src/Benchmarks/CustomLogging.h | 22 ++++++++++++++-------
 src/Benchmarks/JsonLogging.h   | 36 ++++++++++++++++++++++++++++++----
 src/Benchmarks/Logging.h       |  3 ++-
 5 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 0e29b0637..58017abac 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -157,7 +157,7 @@ class Benchmark
 
       // Adds an error message to the log. Should be called in places where the
       // "time" method could not be called (e.g. due to failed allocation).
-      void addErrorMessage( const char* msg );
+      void addErrorMessage( const std::string& message );
 
       bool save( std::ostream& logFile );
 
diff --git a/src/Benchmarks/Benchmarks.hpp b/src/Benchmarks/Benchmarks.hpp
index ba7270278..3e7f41fa9 100644
--- a/src/Benchmarks/Benchmarks.hpp
+++ b/src/Benchmarks/Benchmarks.hpp
@@ -154,6 +154,7 @@ time( ResetFunction reset,
       // stop the main loop when not verbose
       monitor.stopMainLoop();
 
+   std::string errorMessage;
    try {
       if( this->reset )
          std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
@@ -164,7 +165,8 @@ time( ResetFunction reset,
       this->performedLoops = functionTimer.getPerformedLoops();
    }
    catch ( const std::exception& e ) {
-      std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl;
+      errorMessage = "timeFunction failed due to a C++ exception with description: " + std::string(e.what());
+      std::cerr << errorMessage << std::endl;
    }
 
    result.bandwidth = datasetSize / result.time;
@@ -172,7 +174,7 @@ time( ResetFunction reset,
    if( this->baseTime == 0.0 )
       this->baseTime = result.time;
 
-   logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints() );
+   logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints(), errorMessage );
 
    return this->baseTime;
 }
@@ -219,10 +221,10 @@ time( const String & performer,
 template< typename Logger >
 void
 Benchmark< Logger >::
-addErrorMessage( const char* msg )
+addErrorMessage( const std::string& message )
 {
-   logger.writeErrorMessage( msg );
-   std::cerr << msg << std::endl;
+   logger.writeErrorMessage( message );
+   std::cerr << message << std::endl;
 }
 
 template< typename Logger >
diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index 64e89f380..384300b76 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -122,7 +122,8 @@ public:
 
    void
    writeTableRow( const std::string & spanningElement,
-                  const RowElements & subElements )
+                  const RowElements & subElements,
+                  const std::string & errorMessage )
    {
       if( verbose ) {
          for( auto & it : metadataColumns ) {
@@ -142,10 +143,16 @@ public:
          log << it.second << std::endl;
       }
 
-      // benchmark data are indented
-      const std::string indent = "    ";
-      for( auto & it : subElements ) {
-         log << indent << it << std::endl;
+      if( errorMessage.empty() ) {
+         // benchmark data are indented
+         const std::string indent = "    ";
+         for( auto & it : subElements ) {
+            log << indent << it << std::endl;
+         }
+      }
+      else {
+         // write the message
+         log << errorMessage << std::endl;
       }
    }
 
@@ -153,12 +160,13 @@ public:
    logResult( const std::string& spanningElement,
               const HeaderElements& headerElements,
               const RowElements& rowElements,
-              const WidthHints& columnWidthHints ) override
+              const WidthHints& columnWidthHints,
+              const std::string& errorMessage = "" ) override
    {
       TNL_ASSERT_EQ( headerElements.size(), rowElements.size(), "elements must have equal sizes" );
       TNL_ASSERT_EQ( headerElements.size(), columnWidthHints.size(), "elements must have equal sizes" );
       writeTableHeader( spanningElement, headerElements );
-      writeTableRow( spanningElement, rowElements );
+      writeTableRow( spanningElement, rowElements, errorMessage );
    }
 
    virtual void
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index bf972e9aa..423e10072 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -106,7 +106,10 @@ public:
       }
    }
 
-   void writeRow( const HeaderElements& headerElements, const RowElements& rowElements, const WidthHints& widths )
+   void writeRow( const HeaderElements& headerElements,
+                  const RowElements& rowElements,
+                  const WidthHints& widths,
+                  const std::string& errorMessage )
    {
       TNL_ASSERT_EQ( headerElements.size(), rowElements.size(), "elements must have equal sizes" );
       TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
@@ -136,6 +139,11 @@ public:
          log << "         \"" << headerElements[ i ] << "\" : \"" << el << "\"";
          i++;
       }
+      if( ! errorMessage.empty() ) {
+         if( idx++ > 0 )
+            log << "," << std::endl;
+         log << "         \"error\" : \"" << errorMessage << "\"";
+      }
       log << std::endl << "      }";
       this->lineStarted = true;
       if( verbose )
@@ -146,16 +154,36 @@ public:
    logResult( const std::string& spanningElement,
               const HeaderElements& headerElements,
               const RowElements& rowElements,
-              const WidthHints& columnWidthHints ) override
+              const WidthHints& columnWidthHints,
+              const std::string& errorMessage = "" ) override
    {
       writeHeader( headerElements, columnWidthHints );
-      writeRow( headerElements, rowElements, columnWidthHints );
+      writeRow( headerElements, rowElements, columnWidthHints, errorMessage );
    }
 
    virtual void
    writeErrorMessage( const std::string& message ) override
    {
-      log << "\"error\" : \"" << message << "\"" << std::endl;
+      if( this->lineStarted )
+         log << "," << std::endl;
+
+      log << "      {" << std::endl;
+
+      // write common logs
+      int idx( 0 );
+      for( auto lg : this->metadataColumns )
+      {
+         if( idx++ > 0 )
+            log << "," << std::endl;
+         log << "         \"" << lg.first << "\" : \"" << lg.second << "\"";
+      }
+
+      if( idx++ > 0 )
+         log << "," << std::endl;
+      log << "         \"error\" : \"" << message << "\"";
+
+      log << std::endl << "      }";
+      this->lineStarted = true;
    }
 
    virtual void
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index d18005184..99edf022e 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -114,7 +114,8 @@ public:
    logResult( const std::string& spanningElement,
               const HeaderElements& headerElements,
               const RowElements& rowElements,
-              const WidthHints& columnWidthHints ) = 0;
+              const WidthHints& columnWidthHints,
+              const std::string& errorMessage = "" ) = 0;
 
    virtual void writeErrorMessage( const std::string& message ) = 0;
 
-- 
GitLab


From be5054f86c91564cbc1077c047bd9048c13374d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 18:47:56 +0100
Subject: [PATCH 17/40] Benchmarks: performed loops are returned via the
 BenchmarkResults struct, base-time via the getBaseTime method

---
 src/Benchmarks/BLAS/array-operations.h |  7 +---
 src/Benchmarks/Benchmarks.h            | 53 +++++++++++---------------
 src/Benchmarks/Benchmarks.hpp          | 29 +++++++-------
 src/Benchmarks/FunctionTimer.h         | 30 ++++++---------
 4 files changed, 50 insertions(+), 69 deletions(-)

diff --git a/src/Benchmarks/BLAS/array-operations.h b/src/Benchmarks/BLAS/array-operations.h
index 38a58c431..1bffd770a 100644
--- a/src/Benchmarks/BLAS/array-operations.h
+++ b/src/Benchmarks/BLAS/array-operations.h
@@ -116,10 +116,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark,
       hostArray = hostArray2;
    };
    benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
-   // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will
-   // complain when compiling without CUDA
-   const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost );
-   (void)copyBasetime;  // ignore unused variable
+   benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost );
 #ifdef HAVE_CUDA
    auto copyAssignCudaCuda = [&]() {
       deviceArray = deviceArray2;
@@ -135,7 +132,7 @@ benchmarkArrayOperations( Benchmark<> & benchmark,
    auto copyAssignCudaHost = [&]() {
       hostArray = deviceArray;
    };
-   benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime );
+   benchmark.setOperation( "copy (operator=)", datasetSize, benchmark.getBaseTime() );
    benchmark.time< Devices::Cuda >( reset1, "CPU->GPU", copyAssignHostCuda );
    benchmark.time< Devices::Cuda >( reset1, "GPU->CPU", copyAssignCudaHost );
 #endif
diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 58017abac..898ba294e 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -36,6 +36,7 @@ struct BenchmarkResult
    using HeaderElements = typename Logging::HeaderElements;
    using RowElements = typename Logging::RowElements;
 
+   int loops = 0;
    double time = std::numeric_limits<double>::quiet_NaN();
    double stddev = std::numeric_limits<double>::quiet_NaN();
    double bandwidth = std::numeric_limits<double>::quiet_NaN();
@@ -120,40 +121,32 @@ class Benchmark
       // Times a single ComputeFunction. Subsequent calls implicitly split
       // the current operation into sub-columns identified by "performer",
       // which are further split into "bandwidth", "time" and "speedup" columns.
-      // TODO: allow custom columns bound to lambda functions (e.g. for Gflops calculation)
-      // Also terminates the recursion of the following variadic template.
       template< typename Device,
-               typename ResetFunction,
-               typename ComputeFunction >
-      double time( ResetFunction reset,
-                  const String & performer,
-                  ComputeFunction & compute,
-                  BenchmarkResult & result );
+                typename ResetFunction,
+                typename ComputeFunction >
+      void time( ResetFunction reset,
+                 const String & performer,
+                 ComputeFunction & compute,
+                 BenchmarkResult & result );
 
       template< typename Device,
-               typename ResetFunction,
-               typename ComputeFunction >
-      inline double time( ResetFunction reset,
-                        const String & performer,
-                        ComputeFunction & compute );
-      /*{
-         BenchmarkResult result;
-         return time< Device, ResetFunction, ComputeFunction >( reset, performer, compute, result );
-      }*/
-
-      /****
-       * The same methods as above but without reset function
-       */
+                typename ResetFunction,
+                typename ComputeFunction >
+      BenchmarkResult time( ResetFunction reset,
+                            const String & performer,
+                            ComputeFunction & compute );
+
+      // The same methods as above but without the reset function
       template< typename Device,
-               typename ComputeFunction >
-      double time( const String & performer,
-                  ComputeFunction & compute,
-                  BenchmarkResult & result );
+                typename ComputeFunction >
+      void time( const String & performer,
+                 ComputeFunction & compute,
+                 BenchmarkResult & result );
 
       template< typename Device,
-               typename ComputeFunction >
-      inline double time( const String & performer,
-                        ComputeFunction & compute );
+                typename ComputeFunction >
+      BenchmarkResult time( const String & performer,
+                            ComputeFunction & compute );
 
       // Adds an error message to the log. Should be called in places where the
       // "time" method could not be called (e.g. due to failed allocation).
@@ -163,14 +156,14 @@ class Benchmark
 
       SolverMonitorType& getMonitor();
 
-      int getPerformedLoops() const;
+      double getBaseTime() const;
 
       bool isResetingOn() const;
 
    protected:
       Logger logger;
 
-      int loops = 1, performedLoops = 0;
+      int loops = 1;
 
       double minTime = 0.0;
 
diff --git a/src/Benchmarks/Benchmarks.hpp b/src/Benchmarks/Benchmarks.hpp
index 3e7f41fa9..4a085ca2f 100644
--- a/src/Benchmarks/Benchmarks.hpp
+++ b/src/Benchmarks/Benchmarks.hpp
@@ -137,7 +137,7 @@ template< typename Logger >
    template< typename Device,
              typename ResetFunction,
              typename ComputeFunction >
-double
+void
 Benchmark< Logger >::
 time( ResetFunction reset,
       const String & performer,
@@ -157,12 +157,11 @@ time( ResetFunction reset,
    std::string errorMessage;
    try {
       if( this->reset )
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
+         std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
       else {
          auto noReset = [] () {};
-         std::tie( result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor );
+         std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor );
       }
-      this->performedLoops = functionTimer.getPerformedLoops();
    }
    catch ( const std::exception& e ) {
       errorMessage = "timeFunction failed due to a C++ exception with description: " + std::string(e.what());
@@ -175,47 +174,47 @@ time( ResetFunction reset,
       this->baseTime = result.time;
 
    logger.logResult( performer, result.getTableHeader(), result.getRowElements(), result.getColumnWidthHints(), errorMessage );
-
-   return this->baseTime;
 }
 
 template< typename Logger >
    template< typename Device,
              typename ResetFunction,
              typename ComputeFunction >
-inline double
+BenchmarkResult
 Benchmark< Logger >::
 time( ResetFunction reset,
       const String& performer,
       ComputeFunction& compute )
 {
    BenchmarkResult result;
-   return time< Device >( reset, performer, compute, result );
+   time< Device >( reset, performer, compute, result );
+   return result;
 }
 
 template< typename Logger >
    template< typename Device,
              typename ComputeFunction >
-double
+void
 Benchmark< Logger >::
 time( const String & performer,
       ComputeFunction & compute,
       BenchmarkResult & result )
 {
    auto noReset = [] () {};
-   return time< Device >( noReset, performer, compute, result );
+   time< Device >( noReset, performer, compute, result );
 }
 
 template< typename Logger >
    template< typename Device,
              typename ComputeFunction >
-inline double
+BenchmarkResult
 Benchmark< Logger >::
 time( const String & performer,
       ComputeFunction & compute )
 {
    BenchmarkResult result;
-   return time< Device >( performer, compute, result );
+   time< Device >( performer, compute, result );
+   return result;
 }
 
 template< typename Logger >
@@ -244,11 +243,11 @@ getMonitor() -> SolverMonitorType&
 }
 
 template< typename Logger >
-int
+double
 Benchmark< Logger >::
-getPerformedLoops() const
+getBaseTime() const
 {
-   return this->performedLoops;
+   return baseTime;
 }
 
 template< typename Logger >
diff --git a/src/Benchmarks/FunctionTimer.h b/src/Benchmarks/FunctionTimer.h
index ee60db984..010b7ed00 100644
--- a/src/Benchmarks/FunctionTimer.h
+++ b/src/Benchmarks/FunctionTimer.h
@@ -13,7 +13,7 @@
 
 #pragma once
 
-#include <type_traits>
+#include <tuple>
 
 #include <TNL/Timer.h>
 #include <TNL/Devices/Cuda.h>
@@ -27,12 +27,13 @@ template< typename Device >
 class FunctionTimer
 {
 public:
-   // returns a pair of (mean, stddev) where mean is the arithmetic mean of the
+   // returns a tuple of (loops, mean, stddev) where loops is the number of
+   // performed loops (i.e. timing samples), mean is the arithmetic mean of the
    // computation times and stddev is the sample standard deviation
    template< typename ComputeFunction,
              typename ResetFunction,
              typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
-   std::pair< double, double >
+   std::tuple< int, double, double >
    timeFunction( ComputeFunction compute,
                  ResetFunction reset,
                  int maxLoops,
@@ -52,6 +53,7 @@ public:
       Containers::Vector< double > results( maxLoops );
       results.setValue( 0.0 );
 
+      int loops;
       for( loops = 0;
            loops < maxLoops || sum( results ) < minTime;
            loops++ )
@@ -80,23 +82,13 @@ public:
       }
 
       const double mean = sum( results ) / (double) loops;
-      if( loops > 1 ) {
-         const double stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
-         return std::make_pair( mean, stddev );
-      }
-      else {
-         const double stddev = std::numeric_limits<double>::quiet_NaN();
-         return std::make_pair( mean, stddev );
-      }
-   }
-
-   int getPerformedLoops() const
-   {
-      return this->loops;
+      double stddev;
+      if( loops > 1 )
+         stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
+      else
+         stddev = std::numeric_limits<double>::quiet_NaN();
+      return std::make_tuple( loops, mean, stddev );
    }
-
-protected:
-   int loops;
 };
 
 } // namespace Benchmarks
-- 
GitLab


From 824f1b82d70c21a85d1bbb3705fe0f18155c8fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 18:49:52 +0100
Subject: [PATCH 18/40] Fixed benchmarks of grid traversers

---
 .../Traversers/BenchmarkTraverserUserData.h   |   4 +-
 src/Benchmarks/Traversers/CMakeLists.txt      |  13 +-
 .../Traversers/GridTraverserBenchmarkHelper.h |   5 -
 .../GridTraverserBenchmarkHelper_1D.h         |  21 +--
 .../GridTraverserBenchmarkHelper_2D.h         |  16 +-
 .../GridTraverserBenchmarkHelper_3D.h         |  22 +--
 .../Traversers/GridTraversersBenchmark.h      |  18 ---
 .../Traversers/GridTraversersBenchmark_1D.h   |  46 +++---
 .../Traversers/GridTraversersBenchmark_2D.h   |  57 ++++---
 .../Traversers/GridTraversersBenchmark_3D.h   |  42 ++---
 src/Benchmarks/Traversers/cuda-kernels.h      |  42 ++---
 .../Traversers/tnl-benchmark-traversers.h     | 145 +++++++++---------
 12 files changed, 215 insertions(+), 216 deletions(-)

diff --git a/src/Benchmarks/Traversers/BenchmarkTraverserUserData.h b/src/Benchmarks/Traversers/BenchmarkTraverserUserData.h
index 2ae00ec69..35b08d993 100644
--- a/src/Benchmarks/Traversers/BenchmarkTraverserUserData.h
+++ b/src/Benchmarks/Traversers/BenchmarkTraverserUserData.h
@@ -12,6 +12,8 @@
 
 #pragma once
 
+#include <TNL/Pointers/SharedPointer.h>
+
 namespace TNL {
    namespace Benchmarks {
       namespace Traversers {
@@ -25,7 +27,7 @@ class BenchmarkTraverserUserData
       using RealType = typename MeshType::RealType;
       using DeviceType = typename MeshType::DeviceType;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
-      
+
       BenchmarkTraverserUserData( MeshFunctionPointer& f )
          : u( &f.template modifyData< DeviceType >() ), data( f->getData().getData() ){}
 
diff --git a/src/Benchmarks/Traversers/CMakeLists.txt b/src/Benchmarks/Traversers/CMakeLists.txt
index 5932d2606..6b7712d2b 100644
--- a/src/Benchmarks/Traversers/CMakeLists.txt
+++ b/src/Benchmarks/Traversers/CMakeLists.txt
@@ -1,10 +1,9 @@
 # TODO: Split the benchmark into several files for faster build
 
-#if( BUILD_CUDA )
-#    CUDA_ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cu )
-#else()
-#    ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cpp )
-#endif()
-
-#install( TARGETS tnl-benchmark-traversers RUNTIME DESTINATION bin )
+if( BUILD_CUDA )
+    CUDA_ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cu )
+else()
+    ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cpp )
+endif()
 
+install( TARGETS tnl-benchmark-traversers RUNTIME DESTINATION bin )
diff --git a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper.h b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper.h
index 6da7ec09b..13022458a 100644
--- a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper.h
+++ b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper.h
@@ -12,10 +12,6 @@
 
 #pragma once
 
-#include "AddOneEntitiesProcessor.h"
-#include "BenchmarkTraverserUserData.h"
-#include "SimpleCell.h"
-
 namespace TNL {
    namespace Benchmarks {
       namespace Traversers {
@@ -23,7 +19,6 @@ namespace TNL {
 template< typename Grid >
 class GridTraverserBenchmarkHelper{};
 
-
       } // namespace Traversers
    } // namespace Benchmarks
 } // namespace TNL
diff --git a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_1D.h b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_1D.h
index e460a8bca..b2fc9bf9c 100644
--- a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_1D.h
+++ b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_1D.h
@@ -12,12 +12,15 @@
 
 #pragma once
 
+#include <TNL/Functions/MeshFunctionView.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Meshes/Traverser.h>
+
 #include "GridTraverserBenchmarkHelper.h"
 #include "AddOneEntitiesProcessor.h"
 #include "BenchmarkTraverserUserData.h"
 #include "SimpleCell.h"
 
-
 namespace TNL {
    namespace Benchmarks {
       namespace Traversers {
@@ -40,10 +43,10 @@ _GridTraverser1D(
    typedef Index IndexType;
    typedef Meshes::Grid< 1, Real, Devices::Cuda, Index > GridType;
    //typename GridType::CoordinatesType coordinates;
- 
+
    GridEntity entity( *grid );
-   entity.getCoordinates().x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   //coordinates.x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   entity.getCoordinates().x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   //coordinates.x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( entity.getCoordinates() <= end )
    {
       entity.refresh();
@@ -69,7 +72,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Host, Index
       using RealType = typename GridType::RealType;
       using IndexType = typename GridType::IndexType;
       using CoordinatesType = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -108,7 +111,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index
       using RealType = typename GridType::RealType;
       using IndexType = typename GridType::IndexType;
       using CoordinatesType = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -122,7 +125,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index
       {
 #ifdef HAVE_CUDA
             dim3 blockSize( 256 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -131,7 +134,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index
             for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
             {
                dim3 gridSize;
-               Devices::Cuda::setupGrid(
+               Cuda::setupGrid(
                   blocksCount,
                   gridsCount,
                   gridIdx,
@@ -148,7 +151,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index
 #endif
       }
 };
-         
+
       } // namespace Traversers
    } // namespace Benchmarks
 } // namespace TNL
diff --git a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_2D.h b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_2D.h
index eca6c7fee..a3d1a0436 100644
--- a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_2D.h
+++ b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_2D.h
@@ -12,6 +12,10 @@
 
 #pragma once
 
+#include <TNL/Functions/MeshFunctionView.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Meshes/Traverser.h>
+
 #include "GridTraverserBenchmarkHelper.h"
 #include "AddOneEntitiesProcessor.h"
 #include "BenchmarkTraverserUserData.h"
@@ -40,8 +44,8 @@ _GridTraverser2D(
    typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType;
 
    GridEntity entity( *grid );
-   entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
    if( entity.getCoordinates() <= end )
    {
       entity.refresh();
@@ -62,7 +66,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Host, Index
       using RealType = typename GridType::RealType;
       using IndexType = typename GridType::IndexType;
       using CoordinatesType = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -104,7 +108,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index
       using RealType = typename GridType::RealType;
       using IndexType = typename GridType::IndexType;
       using CoordinatesType = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -118,7 +122,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index
       {
 #ifdef HAVE_CUDA
             dim3 blockSize( 16, 16 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -129,7 +133,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index
                for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                {
                   dim3 gridSize;
-                  Devices::Cuda::setupGrid(
+                  Cuda::setupGrid(
                      blocksCount,
                      gridsCount,
                      gridIdx,
diff --git a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_3D.h b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_3D.h
index 4a5da6fd4..939d8a681 100644
--- a/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_3D.h
+++ b/src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_3D.h
@@ -12,6 +12,10 @@
 
 #pragma once
 
+#include <TNL/Functions/MeshFunctionView.h>
+#include <TNL/Pointers/SharedPointer.h>
+#include <TNL/Meshes/Traverser.h>
+
 #include "GridTraverserBenchmarkHelper.h"
 #include "AddOneEntitiesProcessor.h"
 #include "BenchmarkTraverserUserData.h"
@@ -38,12 +42,12 @@ _GridTraverser3D(
    typedef Real RealType;
    typedef Index IndexType;
    typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType;
- 
+
    GridEntity entity( *grid );
-   entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-   entity.getCoordinates().z() = begin.z() + ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
-   
+   entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   entity.getCoordinates().z() = begin.z() + ( gridIdx.z * Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
+
    if( entity.getCoordinates() <= end )
    {
       entity.refresh();
@@ -64,7 +68,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 3, Real, Devices::Host, Index
       using RealType = typename GridType::RealType;
       using IndexType = typename GridType::IndexType;
       using CoordinatesType = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -107,7 +111,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 3, Real, Devices::Cuda, Index
       using RealType = typename GridType::RealType;
       using IndexType = typename GridType::IndexType;
       using CoordinatesType = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -121,7 +125,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 3, Real, Devices::Cuda, Index
       {
 #ifdef HAVE_CUDA
             dim3 blockSize( 32, 4, 2 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -134,7 +138,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 3, Real, Devices::Cuda, Index
                   for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                   {
                      dim3 gridSize;
-                     Devices::Cuda::setupGrid(
+                     Cuda::setupGrid(
                         blocksCount,
                         gridsCount,
                         gridIdx,
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
index 01590f122..5ea87b6dc 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
@@ -12,28 +12,10 @@
 
 #pragma once
 
-#include <TNL/Algorithms/ParallelFor.h>
-#include <TNL/Devices/Host.h>
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Meshes/Grid.h>
-#include <TNL/Meshes/GridEntityConfig.h>
-#include <TNL/Meshes/Traverser.h>
-#include <TNL/Functions/MeshFunction.h>
-#include <TNL/Pointers/SharedPointer.h>
-
-#include "AddOneEntitiesProcessor.h"
-#include "AddTwoEntitiesProcessor.h"
-#include "GridTraverserBenchmarkHelper.h"
-#include "BenchmarkTraverserUserData.h"
-#include "cuda-kernels.h"
-
 namespace TNL {
    namespace Benchmarks {
       namespace Traversers {
 
-
-
 template< int Dimension,
           typename Device,
           typename Real,
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
index 9820af392..6e8e0c37f 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h
@@ -13,23 +13,27 @@
 #pragma once
 
 #include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/contains.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 #include <TNL/Meshes/Traverser.h>
-#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/MeshFunctionView.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include "cuda-kernels.h"
+#include "AddOneEntitiesProcessor.h"
+#include "AddTwoEntitiesProcessor.h"
+#include "BenchmarkTraverserUserData.h"
 #include "GridTraversersBenchmark.h"
+#include "GridTraverserBenchmarkHelper.h"
 #include "SimpleCell.h"
 
 namespace TNL {
    namespace Benchmarks {
       namespace Traversers {
 
-
 template< typename Device,
           typename Real,
           typename Index >
@@ -41,7 +45,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
       using GridType = Meshes::Grid< 1, Real, Device, Index >;
       using GridPointer = Pointers::SharedPointer< GridType >;
       using Coordinates = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< 1, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -51,11 +55,13 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
       using AddTwoEntitiesProcessorType = AddTwoEntitiesProcessor< UserDataType >;
 
       GridTraversersBenchmark( Index size )
-      :size( size ), v( size ), grid( size ), u( grid ),
+      :size( size ),
+       v( size ),
+       grid( size ),
        userData( this->u )
       {
          v_data = v.getData();
-         u->getData().bind( v );
+         u->bind( grid, v );
       }
 
       void reset()
@@ -74,7 +80,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
          {
 #ifdef HAVE_CUDA
             dim3 blockSize( 256 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -83,7 +89,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
             for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
             {
                dim3 gridSize;
-               Devices::Cuda::setupGrid(
+               Cuda::setupGrid(
                   blocksCount,
                   gridsCount,
                   gridIdx,
@@ -100,7 +106,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
          {
             data[ i ] += (Real) 1.0;
          };
-         ParallelFor< Device, AsynchronousMode >::exec( ( Index ) 0, size, f, v.getData() );
+         Algorithms::ParallelFor< Device, Algorithms::AsynchronousMode >::exec( ( Index ) 0, size, f, v.getData() );
       }
 
       void addOneUsingSimpleCell()
@@ -113,7 +119,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
             entity.refresh();
             data[ entity.getIndex() ] += (Real) 1.0;
          };
-         ParallelFor< Device, AsynchronousMode >::exec( ( Index ) 0, size, f, v.getData() );*/
+         Algorithms::ParallelFor< Device, Algorithms::AsynchronousMode >::exec( ( Index ) 0, size, f, v.getData() );*/
          GridTraverserBenchmarkHelper< GridType >::simpleCellTest(
             grid,
             userData,
@@ -132,15 +138,15 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
             _u->getData().getData()[ entity.getIndex() ] += (Real) 1.0;
             // ( *_u )( entity ) += (Real) 1.0;
          };
-         ParallelFor< Device, AsynchronousMode >::exec( ( Index ) 0, size, f );
+         Algorithms::ParallelFor< Device, Algorithms::AsynchronousMode >::exec( ( Index ) 0, size, f );
       }
 
       void addOneUsingTraverser()
       {
          using CoordinatesType = typename GridType::CoordinatesType;
-         traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
+         traverser.template processAllEntities< AddOneEntitiesProcessorType >
             ( grid, userData );
-         
+
          /*GridTraverserBenchmarkHelper< GridType >::noBCTraverserTest(
             grid,
             userData,
@@ -151,8 +157,8 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
       {
          std::cout << loops << " -> " << v << std::endl;
          if( reseting )
-            return v.containsOnlyValue( 1.0 );
-         return v.containsOnlyValue( ( Real ) loops );
+            return Algorithms::containsOnlyValue( v, 1.0 );
+         return Algorithms::containsOnlyValue( v, ( Real ) loops );
       }
 
       void traverseUsingPureC()
@@ -168,7 +174,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
          {
 #ifdef HAVE_CUDA
             dim3 blockSize( 256 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -177,7 +183,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
             for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
             {
                dim3 gridSize;
-               Devices::Cuda::setupGrid(
+               Cuda::setupGrid(
                   blocksCount,
                   gridsCount,
                   gridIdx,
@@ -187,7 +193,7 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
             for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
             {
                dim3 gridSize;
-               Devices::Cuda::setupGrid(
+               Cuda::setupGrid(
                   blocksCount,
                   gridsCount,
                   gridIdx,
@@ -201,11 +207,11 @@ class GridTraversersBenchmark< 1, Device, Real, Index >
       void traverseUsingTraverser()
       {
          // TODO !!!!!!!!!!!!!!!!!!!!!!
-         //traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
+         //traverser.template processAllEntities< AddOneEntitiesProcessorType >
 
-         traverser.template processBoundaryEntities< UserDataType, AddTwoEntitiesProcessorType >
+         traverser.template processBoundaryEntities< AddTwoEntitiesProcessorType >
             ( grid, userData );
-         traverser.template processInteriorEntities< UserDataType, AddOneEntitiesProcessorType >
+         traverser.template processInteriorEntities< AddOneEntitiesProcessorType >
             ( grid, userData );
       }
 
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
index 0e9ae7f2f..d310ec451 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h
@@ -13,16 +13,21 @@
 #pragma once
 
 #include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/contains.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 #include <TNL/Meshes/Traverser.h>
-#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/MeshFunctionView.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include "cuda-kernels.h"
+#include "AddOneEntitiesProcessor.h"
+#include "AddTwoEntitiesProcessor.h"
+#include "BenchmarkTraverserUserData.h"
 #include "GridTraversersBenchmark.h"
+#include "GridTraverserBenchmarkHelper.h"
 #include "SimpleCell.h"
 
 namespace TNL {
@@ -35,12 +40,12 @@ template< typename Device,
 class GridTraversersBenchmark< 2, Device, Real, Index >
 {
    public:
-      
+
       using Vector = Containers::Vector< Real, Device, Index >;
       using GridType = Meshes::Grid< 2, Real, Device, Index >;
       using GridPointer = Pointers::SharedPointer< GridType >;
       using Coordinates = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< 2, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -50,11 +55,13 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
       using AddTwoEntitiesProcessorType = AddTwoEntitiesProcessor< UserDataType >;
 
       GridTraversersBenchmark( Index size )
-      :size( size ), v( size * size ), grid( size, size ), u( grid ),
+      :size( size ),
+       v( size * size ),
+       grid( size, size ),
        userData( u )
       {
          v_data = v.getData();
-         u->getData().bind( v );
+         u->bind( grid, v );
       }
 
       void reset()
@@ -74,7 +81,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
          {
 #ifdef HAVE_CUDA
             dim3 blockSize( 16, 16 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -85,7 +92,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
                for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                {
                   dim3 gridSize;
-                  Devices::Cuda::setupGrid(
+                  Cuda::setupGrid(
                      blocksCount,
                      gridsCount,
                      gridIdx,
@@ -103,8 +110,8 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
          {
             data[ j * _size + i ] += (Real) 1.0;
          };
-         
-         ParallelFor2D< Device, AsynchronousMode >::exec(
+
+         Algorithms::ParallelFor2D< Device, Algorithms::AsynchronousMode >::exec(
             ( Index ) 0,
             ( Index ) 0,
             this->size,
@@ -123,8 +130,8 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
             entity.refresh();
             data[ entity.getIndex() ] += (Real) 1.0;
          };
-         
-         ParallelFor2D< Device, AsynchronousMode >::exec(
+
+         Algorithms::ParallelFor2D< Device, Algorithms::AsynchronousMode >::exec(
             ( Index ) 0,
             ( Index ) 0,
             this->size,
@@ -134,7 +141,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
             grid,
             userData,
             size );
-         
+
       }
 
       void addOneUsingParallelForAndMeshFunction()
@@ -150,8 +157,8 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
             //( *_u )( entity ) += (Real) 1.0;
             _u->getData().getData()[ entity.getIndex() ] += (Real) 1.0;
          };
-         
-         ParallelFor2D< Device, AsynchronousMode >::exec(
+
+         Algorithms::ParallelFor2D< Device, Algorithms::AsynchronousMode >::exec(
             ( Index ) 0,
             ( Index ) 0,
             this->size,
@@ -163,9 +170,9 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
       void addOneUsingTraverser()
       {
          using CoordinatesType = typename GridType::CoordinatesType;
-         traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
+         traverser.template processAllEntities< AddOneEntitiesProcessorType >
             ( grid, userData );
-         
+
          /*Meshes::GridTraverser< Grid >::template processEntities< Cell, WriteOneEntitiesProcessorType, WriteOneTraverserUserDataType, false >(
            grid,
            CoordinatesType( 0 ),
@@ -188,8 +195,8 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
       bool checkAddOne( int loops, bool reseting )
       {
          if( reseting )
-            return v.containsOnlyValue( 1.0 );
-         return v.containsOnlyValue( ( Real ) loops );
+            return Algorithms::containsOnlyValue( v, 1.0 );
+         return Algorithms::containsOnlyValue( v, ( Real ) loops );
       }
 
       void traverseUsingPureC()
@@ -215,7 +222,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
          {
 #ifdef HAVE_CUDA
             dim3 blockSize( 32, 8 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -226,7 +233,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
                for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                {
                   dim3 gridSize;
-                  Devices::Cuda::setupGrid(
+                  Cuda::setupGrid(
                      blocksCount,
                      gridsCount,
                      gridIdx,
@@ -237,7 +244,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
                for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                {
                   dim3 gridSize;
-                  Devices::Cuda::setupGrid(
+                  Cuda::setupGrid(
                      blocksCount,
                      gridsCount,
                      gridIdx,
@@ -250,15 +257,15 @@ class GridTraversersBenchmark< 2, Device, Real, Index >
 
       void traverseUsingTraverser()
       {
-         //traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
-         traverser.template processBoundaryEntities< UserDataType, AddTwoEntitiesProcessorType >
+         //traverser.template processAllEntities< AddOneEntitiesProcessorType >
+         traverser.template processBoundaryEntities< AddTwoEntitiesProcessorType >
             ( grid, userData );
-         traverser.template processInteriorEntities< UserDataType, AddOneEntitiesProcessorType >
+         traverser.template processInteriorEntities< AddOneEntitiesProcessorType >
             ( grid, userData );
       }
 
    protected:
-        
+
       Index size;
       Vector v;
       Real* v_data;
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
index 26b6413e4..8e3525366 100644
--- a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h
@@ -13,18 +13,21 @@
 #pragma once
 
 #include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/contains.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 #include <TNL/Meshes/Traverser.h>
-#include <TNL/Functions/MeshFunction.h>
+#include <TNL/Functions/MeshFunctionView.h>
 #include <TNL/Pointers/SharedPointer.h>
 #include "cuda-kernels.h"
 #include "AddOneEntitiesProcessor.h"
+#include "AddTwoEntitiesProcessor.h"
 #include "BenchmarkTraverserUserData.h"
 #include "GridTraversersBenchmark.h"
+#include "GridTraverserBenchmarkHelper.h"
 #include "SimpleCell.h"
 
 namespace TNL {
@@ -42,7 +45,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
       using GridType = Meshes::Grid< 3, Real, Device, Index >;
       using GridPointer = Pointers::SharedPointer< GridType >;
       using Coordinates = typename GridType::CoordinatesType;
-      using MeshFunction = Functions::MeshFunction< GridType >;
+      using MeshFunction = Functions::MeshFunctionView< GridType >;
       using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >;
       using CellType = typename GridType::template EntityType< 3, Meshes::GridEntityNoStencilStorage >;
       using SimpleCellType = SimpleCell< GridType >;
@@ -55,11 +58,10 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
       : size( size ),
         v( size * size * size ),
         grid( size, size, size ),
-        u( grid ),
         userData( u )
       {
          v_data = v.getData();
-         u->getData().bind( v );
+         u->bind( grid, v );
       }
 
       void reset()
@@ -80,7 +82,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
          {
 #ifdef HAVE_CUDA
             dim3 blockSize( 32, 4, 2 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -93,7 +95,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
                   for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                   {
                      dim3 gridSize;
-                     Devices::Cuda::setupGrid(
+                     Cuda::setupGrid(
                         blocksCount,
                         gridsCount,
                         gridIdx,
@@ -111,8 +113,8 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
          {
             data[ ( k * _size + j ) * _size + i ] += (Real) 1.0;
          };
-         
-         ParallelFor3D< Device, AsynchronousMode >::exec(
+
+         Algorithms::ParallelFor3D< Device, Algorithms::AsynchronousMode >::exec(
             ( Index ) 0,
             ( Index ) 0,
             ( Index ) 0,
@@ -135,7 +137,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
             data[ entity.getIndex() ] += (Real) 1.0;
          };
 
-         ParallelFor3D< Device, AsynchronousMode >::exec(
+         Algorithms::ParallelFor3D< Device, Algorithms::AsynchronousMode >::exec(
             ( Index ) 0,
             ( Index ) 0,
             ( Index ) 0,
@@ -165,7 +167,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
             _u->getData().getData()[ entity.getIndex() ] += (Real) 1.0;
          };
 
-         ParallelFor3D< Device, AsynchronousMode >::exec(
+         Algorithms::ParallelFor3D< Device, Algorithms::AsynchronousMode >::exec(
             ( Index ) 0,
             ( Index ) 0,
             ( Index ) 0,
@@ -177,15 +179,15 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
 
       void addOneUsingTraverser()
       {
-         traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
+         traverser.template processAllEntities< AddOneEntitiesProcessorType >
             ( grid, userData );
       }
 
       bool checkAddOne( int loops, bool reseting )
       {
          if( reseting )
-            return v.containsOnlyValue( 1.0 );
-         return v.containsOnlyValue( ( Real ) loops );
+            return Algorithms::containsOnlyValue( v, 1.0 );
+         return Algorithms::containsOnlyValue( v, ( Real ) loops );
       }
 
       void traverseUsingPureC()
@@ -221,7 +223,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
          {
 #ifdef HAVE_CUDA
             dim3 blockSize( 32, 4, 2 ), blocksCount, gridsCount;
-            Devices::Cuda::setupThreads(
+            Cuda::setupThreads(
                blockSize,
                blocksCount,
                gridsCount,
@@ -234,7 +236,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
                   for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                   {
                      dim3 gridSize;
-                     Devices::Cuda::setupGrid(
+                     Cuda::setupGrid(
                         blocksCount,
                         gridsCount,
                         gridIdx,
@@ -246,7 +248,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
                   for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ )
                   {
                      dim3 gridSize;
-                     Devices::Cuda::setupGrid(
+                     Cuda::setupGrid(
                         blocksCount,
                         gridsCount,
                         gridIdx,
@@ -260,16 +262,16 @@ class GridTraversersBenchmark< 3, Device, Real, Index >
       void traverseUsingTraverser()
       {
          // TODO !!!!!!!!!!!!!!!!!!!!!!
-         //traverser.template processAllEntities< UserDataType, AddOneEntitiesProcessorType >
+         //traverser.template processAllEntities< AddOneEntitiesProcessorType >
 
-         traverser.template processBoundaryEntities< UserDataType, AddTwoEntitiesProcessorType >
+         traverser.template processBoundaryEntities< AddTwoEntitiesProcessorType >
             ( grid, userData );
-         traverser.template processInteriorEntities< UserDataType, AddOneEntitiesProcessorType >
+         traverser.template processInteriorEntities< AddOneEntitiesProcessorType >
             ( grid, userData );
       }
 
    protected:
-      
+
       Index size;
       Vector v;
       Real* v_data;
diff --git a/src/Benchmarks/Traversers/cuda-kernels.h b/src/Benchmarks/Traversers/cuda-kernels.h
index a90baf5b0..d092925bf 100644
--- a/src/Benchmarks/Traversers/cuda-kernels.h
+++ b/src/Benchmarks/Traversers/cuda-kernels.h
@@ -25,7 +25,7 @@ template< typename Real,
           typename Index >
 __global__ void fullGridTraverseKernel1D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( threadIdx_x < size )
       v_data[ threadIdx_x ] += (Real) 1.0;
 }
@@ -34,8 +34,8 @@ template< typename Real,
           typename Index >
 __global__ void fullGridTraverseKernel2D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_y = ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
    if( threadIdx_x < size && threadIdx_y < size )
       v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 1.0;
 }
@@ -44,21 +44,21 @@ template< typename Real,
           typename Index >
 __global__ void fullGridTraverseKernel3D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-   const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_y = ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   const Index threadIdx_z = ( gridIdx.z * Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
    if( threadIdx_x < size && threadIdx_y < size && threadIdx_z < size )
       v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 1.0;
 }
 
 /****
- * Traversing interior cells 
+ * Traversing interior cells
  */
 template< typename Real,
           typename Index >
 __global__ void interiorTraverseKernel1D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( threadIdx_x > 0 && threadIdx_x < size - 1 )
       v_data[ threadIdx_x ] += (Real) 1.0;
 }
@@ -67,9 +67,9 @@ template< typename Real,
           typename Index >
 __global__ void interiorTraverseKernel2D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-   if( threadIdx_x > 0 && threadIdx_y > 0 && 
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_y = ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   if( threadIdx_x > 0 && threadIdx_y > 0 &&
        threadIdx_x < size - 1 && threadIdx_y < size - 1 )
          v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 1.0;
 }
@@ -78,9 +78,9 @@ template< typename Real,
           typename Index >
 __global__ void interiorTraverseKernel3D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-   const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_y = ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   const Index threadIdx_z = ( gridIdx.z * Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
    if( threadIdx_x > 0 && threadIdx_y > 0 && threadIdx_z > 0 &&
        threadIdx_x < size - 1 && threadIdx_y < size - 1 && threadIdx_z < size - 1 )
       v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 1.0;
@@ -93,7 +93,7 @@ template< typename Real,
           typename Index >
 __global__ void boundariesTraverseKernel1D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
    if( threadIdx_x == 0 || threadIdx_x == size - 1 )
       v_data[ threadIdx_x ] += (Real) 2.0;
 }
@@ -102,9 +102,9 @@ template< typename Real,
           typename Index >
 __global__ void boundariesTraverseKernel2D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-   if( threadIdx_x > 0 && threadIdx_y > 0 && 
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_y = ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   if( threadIdx_x > 0 && threadIdx_y > 0 &&
        threadIdx_x < size - 1 && threadIdx_y < size - 1 )
          v_data[ threadIdx_y * size + threadIdx_x ] += (Real) 2.0;
 }
@@ -113,9 +113,9 @@ template< typename Real,
           typename Index >
 __global__ void boundariesTraverseKernel3D( const Index size, const dim3 gridIdx, Real* v_data  )
 {
-   const Index threadIdx_x = ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   const Index threadIdx_y = ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
-   const Index threadIdx_z = ( gridIdx.z * Devices::Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
+   const Index threadIdx_x = ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   const Index threadIdx_y = ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y;
+   const Index threadIdx_z = ( gridIdx.z * Cuda::getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
    if( threadIdx_x == 0 || threadIdx_y == 0 || threadIdx_z == 0 ||
        threadIdx_x == size - 1 || threadIdx_y == size - 1 || threadIdx_z == size - 1 )
       v_data[ ( threadIdx_z * size + threadIdx_y ) * size + threadIdx_x ] += (Real) 2.0;
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 30c364ac3..fa058bbcd 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -20,25 +20,26 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Algorithms/ParallelFor.h>
-#include <TNL/Containers/List.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
 using namespace TNL::Benchmarks::Traversers;
 
 
+template< typename T, typename S >
+bool containsValue( const std::vector< T >& container, const S& value )
+{
+   return std::find( container.begin(), container.end(), value ) != container.end();
+}
+
 template< int Dimension,
           typename Real = float,
           typename Index = int >
 bool runBenchmark( const Config::ParameterContainer& parameters,
-                   Benchmark& benchmark,
-                   Benchmark::MetadataMap& metadata )
+                   Benchmark<>& benchmark,
+                   Logging::MetadataMap& metadata )
 {
-   const Containers::List< String >& tests = parameters.getParameter< Containers::List< String > >( "tests" );
-   // FIXME: the --tests is just a string because list does not work with enums
-//   const Containers::List< String >& tests = parameters.getParameter< Containers::List< String > >( "tests" );
-   //Containers::List< String > tests;
-   //tests.Append( parameters.getParameter< String >( "tests" ) );
+   const std::vector< String >& tests = parameters.getParameter< std::vector< String > >( "tests" );
    // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
    // which have a default value. The workaround below works for int values, but it is not possible
    // to pass 64-bit integer values
@@ -49,8 +50,8 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
    const bool withHost = parameters.getParameter< bool >( "with-host" );
 #ifdef HAVE_CUDA
    const bool withCuda = parameters.getParameter< bool >( "with-cuda" );
-#else
-   const bool withCuda = false;
+//#else
+//   const bool withCuda = false;
 #endif
    const bool check = parameters.getParameter< bool >( "check" );
 
@@ -76,14 +77,14 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          cudaTraverserBenchmark.reset();
       };
 #endif
-      benchmark.setMetadataColumns(
-         Benchmark::MetadataColumns( 
-            {  {"size", convertToString( size ) }, } ) );
+      benchmark.setMetadataColumns({
+            {"size", convertToString( size ) },
+      });
 
       /****
        * Add one using pure C code
        */
-      if( tests.containsValue( "all" ) || tests.containsValue( "add-one-pure-c"  ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "add-one-pure-c"  ) )
       {
          benchmark.setOperation( "Pure C", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
 
@@ -93,9 +94,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withHost )
          {
-            benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingPureC );
+            const BenchmarkResult result = benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingPureC );
             if( check && ! hostTraverserBenchmark.checkAddOne(
-                  benchmark.getPerformedLoops(),
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -106,9 +107,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withCuda )
          {
-            benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingPureC );
+            const BenchmarkResult result = benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingPureC );
             if( check && ! cudaTraverserBenchmark.checkAddOne(
-                  benchmark.getPerformedLoops(),
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -118,7 +119,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       /****
        * Add one using parallel for
        */
-      if( tests.containsValue( "all" ) || tests.containsValue( "add-one-parallel-for" ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "add-one-parallel-for" ) )
       {
          benchmark.setOperation( "parallel for", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
 
@@ -128,9 +129,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withHost )
          {
-            benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingParallelFor );
-            if( check && ! hostTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingParallelFor );
+            if( check && ! hostTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -142,9 +143,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withCuda )
          {
-            benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingParallelFor );
-            if( check && ! cudaTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingParallelFor );
+            if( check && ! cudaTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -154,7 +155,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       /****
        * Add one using parallel for with grid entity
        */
-      if( tests.containsValue( "all" ) || tests.containsValue( "add-one-simple-cell" ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "add-one-simple-cell" ) )
       {
          auto hostAddOneUsingSimpleCell = [&] ()
          {
@@ -163,9 +164,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          benchmark.setOperation( "simple cell", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
          if( withHost )
          {
-            benchmark.time< Devices::Host >( hostReset, "CPU", hostAddOneUsingSimpleCell );
-            if( check && ! hostTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Host >( hostReset, "CPU", hostAddOneUsingSimpleCell );
+            if( check && ! hostTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -176,9 +177,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withCuda )
          {
-            benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaAddOneUsingSimpleCell );
-            if( check && ! cudaTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaAddOneUsingSimpleCell );
+            if( check && ! cudaTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -188,7 +189,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       /****
        * Add one using parallel for with mesh function
        */
-      if( tests.containsValue( "all" ) || tests.containsValue( "add-one-parallel-for-and-mesh-function" ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "add-one-parallel-for-and-mesh-function" ) )
       {
          auto hostAddOneUsingParallelForAndMeshFunction = [&] ()
          {
@@ -197,9 +198,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          benchmark.setOperation( "par.for+mesh fc.", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
          if( withHost )
          {
-            benchmark.time< Devices::Host >( hostReset, "CPU", hostAddOneUsingParallelForAndMeshFunction );
-            if( check && ! hostTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Host >( hostReset, "CPU", hostAddOneUsingParallelForAndMeshFunction );
+            if( check && ! hostTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -210,9 +211,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withCuda )
          {
-            benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaAddOneUsingParallelForAndMeshFunction );
-            if( check && ! cudaTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaAddOneUsingParallelForAndMeshFunction );
+            if( check && ! cudaTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -222,7 +223,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       /****
        * Add one using traverser
        */
-      if( tests.containsValue( "all" ) || tests.containsValue( "add-one-traverser" ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "add-one-traverser" ) )
       {
          benchmark.setOperation( "traverser", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
          auto hostWriteOneUsingTraverser = [&] ()
@@ -231,9 +232,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withHost )
          {
-            benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingTraverser );
-            if( check && ! hostTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingTraverser );
+            if( check && ! hostTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -245,9 +246,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          };
          if( withCuda )
          {
-            benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingTraverser );
-            if( check && ! cudaTraverserBenchmark.checkAddOne( 
-                  benchmark.getPerformedLoops(),
+            const BenchmarkResult result = benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingTraverser );
+            if( check && ! cudaTraverserBenchmark.checkAddOne(
+                  result.loops,
                   benchmark.isResetingOn() ) )
                benchmark.addErrorMessage( "Test results are not correct." );
          }
@@ -255,10 +256,6 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       }
       std::cout << "--------------------------------------------------------------------------------------------------------" << std::endl;
    }
-   return true;
-      }
-      std::cout << "--------------------------------------------------------------------------------------------------------" << std::endl;
-   }
 
    /****
     * Full grid traversing including boundary conditions
@@ -281,9 +278,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       };
 #endif
 
-      benchmark.setMetadataColumns(
-         Benchmark::MetadataColumns(
-            {  {"size", convertToString( size ) }, } ) );
+      benchmark.setMetadataColumns({
+            {"size", convertToString( size ) },
+      });
 
       /****
        * Write one and two (as BC) using C for
@@ -300,7 +297,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       };
 #endif
 
-      if( tests.containsValue( "all" ) || tests.containsValue( "bc-pure-c" ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "bc-pure-c" ) )
       {
          benchmark.setOperation( "Pure C", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
          if( withHost )
@@ -335,7 +332,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       };
 #endif
 
-      if( tests.containsValue( "all" ) || tests.containsValue( "bc-parallel-for" ) )
+      if( containsValue( tests, "all" ) || containsValue( tests, "bc-parallel-for" ) )
       {
          benchmark.setOperation( "parallel for", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
          if( withHost )
@@ -364,7 +361,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
 //         cudaTraverserBenchmark.addOneUsingParallelFor();
 //      };
 //
-//      if( tests.containsValue( "all" ) || tests.containsValue( "bc-parallel-for" ) )
+//      if( containsValue( tests, "all" ) || containsValue( tests, "bc-parallel-for" ) )
 //      {
 //         benchmark.setOperation( "parallel for", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
 //         if( withHost )
@@ -393,8 +390,8 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
          cudaTraverserBenchmark.addOneUsingTraverser();
       };
 #endif
-      
-      if( tests.containsValue( "all" ) || tests.containsValue( "bc-traverser" ) )
+
+      if( containsValue( tests, "all" ) || containsValue( tests, "bc-traverser" ) )
       {
          benchmark.setOperation( "traverser", 2 * pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
          if( withHost )
@@ -418,11 +415,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
    return true;
 }
 
-void setupConfig( Config::ConfigDescription& config )
+void configSetup( Config::ConfigDescription& config )
 {
-   config.addList< String >( "tests", "Tests to be performed.", "all" );
-   // FIXME: addList does not work with addEntryEnum - ConfigDescription::addEntryEnum throws std::bad_cast
-   // config.addList< String >( "tests", "Tests to be performed.", "all" );
+   config.addList< String >( "tests", "Tests to be performed.", {"all"} );
    config.addEntryEnum( "all" );
    config.addEntryEnum( "add-one-pure-c" );
    config.addEntryEnum( "add-one-parallel-for" );
@@ -443,15 +438,15 @@ void setupConfig( Config::ConfigDescription& config )
    config.addEntryEnum( "append" );
    config.addEntryEnum( "overwrite" );
 
-   config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
-   config.addEntryEnum( "float" );
-   config.addEntryEnum( "double" );
-   config.addEntryEnum( "all" );
+//   config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
+//   config.addEntryEnum( "float" );
+//   config.addEntryEnum( "double" );
+//   config.addEntryEnum( "all" );
    config.addEntry< int >( "dimension", "Set the problem dimension. 0 means all dimensions 1,2 and 3.", 0 );
    config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark.", 10 );
    config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 1000 );
-   config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
-   Benchmark::configSetup( config );
+//   config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
+   Benchmark<>::configSetup( config );
 
    config.addDelimiter( "Device settings:" );
    Devices::Host::configSetup( config );
@@ -463,12 +458,12 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
 {
    const String & logFileName = parameters.getParameter< String >( "log-file" );
    const String & outputMode = parameters.getParameter< String >( "output-mode" );
-   const String & precision = parameters.getParameter< String >( "precision" );
-   const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
+//   const String & precision = parameters.getParameter< String >( "precision" );
+//   const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
 
-   Benchmark benchmark; //( loops, verbose );
+   Benchmark<> benchmark; //( loops, verbose );
    benchmark.setup( parameters );
-   Benchmark::MetadataMap metadata = getHardwareMetadata();
+   Logging::MetadataMap metadata = getHardwareMetadata();
    runBenchmark< Dimension >( parameters, benchmark, metadata );
 
    auto mode = std::ios::out;
@@ -488,15 +483,15 @@ int main( int argc, char* argv[] )
 {
    Config::ConfigDescription config;
    Config::ParameterContainer parameters;
-   
-   setupConfig( config );
+
+   configSetup( config );
    if( ! parseCommandLine( argc, argv, config, parameters ) )
       return EXIT_FAILURE;
 
    if( ! Devices::Host::setup( parameters ) ||
        ! Devices::Cuda::setup( parameters ) )
       return EXIT_FAILURE;
-   
+
    const int dimension = parameters.getParameter< int >( "dimension" );
    bool status( false );
    if( ! dimension )
-- 
GitLab


From 64fec656efbb64ce1bc29035aa76e8d8da6475ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 18:53:58 +0100
Subject: [PATCH 19/40] Benchmarks: removed useless class FunctionTimer, plain
 function timeFunction remains

---
 src/Benchmarks/Benchmarks.hpp  |   5 +-
 src/Benchmarks/FunctionTimer.h | 106 ++++++++++++++++-----------------
 2 files changed, 53 insertions(+), 58 deletions(-)

diff --git a/src/Benchmarks/Benchmarks.hpp b/src/Benchmarks/Benchmarks.hpp
index 4a085ca2f..ce615c432 100644
--- a/src/Benchmarks/Benchmarks.hpp
+++ b/src/Benchmarks/Benchmarks.hpp
@@ -146,7 +146,6 @@ time( ResetFunction reset,
 {
    result.time = std::numeric_limits<double>::quiet_NaN();
    result.stddev = std::numeric_limits<double>::quiet_NaN();
-   FunctionTimer< Device > functionTimer;
 
    // run the monitor main loop
    Solvers::SolverMonitorThread monitor_thread( monitor );
@@ -157,10 +156,10 @@ time( ResetFunction reset,
    std::string errorMessage;
    try {
       if( this->reset )
-         std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, reset, loops, minTime, monitor );
+         std::tie( result.loops, result.time, result.stddev ) = timeFunction< Device >( compute, reset, loops, minTime, monitor );
       else {
          auto noReset = [] () {};
-         std::tie( result.loops, result.time, result.stddev ) = functionTimer.timeFunction( compute, noReset, loops, minTime, monitor );
+         std::tie( result.loops, result.time, result.stddev ) = timeFunction< Device >( compute, noReset, loops, minTime, monitor );
       }
    }
    catch ( const std::exception& e ) {
diff --git a/src/Benchmarks/FunctionTimer.h b/src/Benchmarks/FunctionTimer.h
index 010b7ed00..4cadf60b4 100644
--- a/src/Benchmarks/FunctionTimer.h
+++ b/src/Benchmarks/FunctionTimer.h
@@ -23,73 +23,69 @@
 namespace TNL {
 namespace Benchmarks {
 
-template< typename Device >
-class FunctionTimer
+// returns a tuple of (loops, mean, stddev) where loops is the number of
+// performed loops (i.e. timing samples), mean is the arithmetic mean of the
+// computation times and stddev is the sample standard deviation
+template< typename Device,
+          typename ComputeFunction,
+          typename ResetFunction,
+          typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
+std::tuple< int, double, double >
+timeFunction( ComputeFunction compute,
+              ResetFunction reset,
+              int maxLoops,
+              const double& minTime,
+              Monitor && monitor = Monitor() )
 {
-public:
-   // returns a tuple of (loops, mean, stddev) where loops is the number of
-   // performed loops (i.e. timing samples), mean is the arithmetic mean of the
-   // computation times and stddev is the sample standard deviation
-   template< typename ComputeFunction,
-             typename ResetFunction,
-             typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
-   std::tuple< int, double, double >
-   timeFunction( ComputeFunction compute,
-                 ResetFunction reset,
-                 int maxLoops,
-                 const double& minTime,
-                 Monitor && monitor = Monitor() )
-   {
-      // the timer is constructed zero-initialized and stopped
-      Timer timer;
+   // the timer is constructed zero-initialized and stopped
+   Timer timer;
 
-      // set timer to the monitor
-      monitor.setTimer( timer );
+   // set timer to the monitor
+   monitor.setTimer( timer );
 
-      // warm up
-      reset();
-      compute();
+   // warm up
+   reset();
+   compute();
 
-      Containers::Vector< double > results( maxLoops );
-      results.setValue( 0.0 );
+   Containers::Vector< double > results( maxLoops );
+   results.setValue( 0.0 );
 
-      int loops;
-      for( loops = 0;
-           loops < maxLoops || sum( results ) < minTime;
-           loops++ )
-      {
-         // abuse the monitor's "time" for loops
-         monitor.setTime( loops + 1 );
-         reset();
+   int loops;
+   for( loops = 0;
+        loops < maxLoops || sum( results ) < minTime;
+        loops++ )
+   {
+      // abuse the monitor's "time" for loops
+      monitor.setTime( loops + 1 );
+      reset();
 
-         // Explicit synchronization of the CUDA device
+      // Explicit synchronization of the CUDA device
 #ifdef HAVE_CUDA
-         if( std::is_same< Device, Devices::Cuda >::value )
-            cudaDeviceSynchronize();
+      if( std::is_same< Device, Devices::Cuda >::value )
+         cudaDeviceSynchronize();
 #endif
 
-         // reset timer before each computation
-         timer.reset();
-         timer.start();
-         compute();
+      // reset timer before each computation
+      timer.reset();
+      timer.start();
+      compute();
 #ifdef HAVE_CUDA
-         if( std::is_same< Device, Devices::Cuda >::value )
-            cudaDeviceSynchronize();
+      if( std::is_same< Device, Devices::Cuda >::value )
+         cudaDeviceSynchronize();
 #endif
-         timer.stop();
-
-         results[ loops ] = timer.getRealTime();
-      }
-
-      const double mean = sum( results ) / (double) loops;
-      double stddev;
-      if( loops > 1 )
-         stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
-      else
-         stddev = std::numeric_limits<double>::quiet_NaN();
-      return std::make_tuple( loops, mean, stddev );
+      timer.stop();
+
+      results[ loops ] = timer.getRealTime();
    }
-};
+
+   const double mean = sum( results ) / (double) loops;
+   double stddev;
+   if( loops > 1 )
+      stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
+   else
+      stddev = std::numeric_limits<double>::quiet_NaN();
+   return std::make_tuple( loops, mean, stddev );
+}
 
 } // namespace Benchmarks
 } // namespace TNL
-- 
GitLab


From c31b22688a00cec30bf4014e6da6c5900e8675d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 19:07:03 +0100
Subject: [PATCH 20/40] Benchmarks: moved plain functions into Utils.h

---
 src/Benchmarks/Benchmarks.h    |  59 -------------
 src/Benchmarks/Benchmarks.hpp  |   2 +-
 src/Benchmarks/CMakeLists.txt  |   2 +-
 src/Benchmarks/FunctionTimer.h |  91 --------------------
 src/Benchmarks/Utils.h         | 150 +++++++++++++++++++++++++++++++++
 5 files changed, 152 insertions(+), 152 deletions(-)
 delete mode 100644 src/Benchmarks/FunctionTimer.h
 create mode 100644 src/Benchmarks/Utils.h

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 898ba294e..e8c277f52 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -20,12 +20,6 @@
 #include <TNL/String.h>
 #include <TNL/Solvers/IterativeSolverMonitor.h>
 
-#include <TNL/Devices/Host.h>
-#include <TNL/SystemInfo.h>
-#include <TNL/Cuda/DeviceInfo.h>
-#include <TNL/Config/ConfigDescription.h>
-#include <TNL/MPI/Wrappers.h>
-
 namespace TNL {
 namespace Benchmarks {
 
@@ -176,59 +170,6 @@ class Benchmark
       SolverMonitorType monitor;
 };
 
-
-inline typename Logging::MetadataMap getHardwareMetadata()
-{
-   const int cpu_id = 0;
-   const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
-   String cacheInfo = convertToString( cacheSizes.L1data ) + ", "
-                       + convertToString( cacheSizes.L1instruction ) + ", "
-                       + convertToString( cacheSizes.L2 ) + ", "
-                       + convertToString( cacheSizes.L3 );
-#ifdef HAVE_CUDA
-   const int activeGPU = Cuda::DeviceInfo::getActiveDevice();
-   const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
-                             convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) );
-#endif
-
-#ifdef HAVE_MPI
-   int nproc = 1;
-   // check if MPI was initialized (some benchmarks do not initialize MPI even when
-   // they are built with HAVE_MPI and thus MPI::GetSize() cannot be used blindly)
-   if( TNL::MPI::Initialized() )
-      nproc = TNL::MPI::GetSize();
-#endif
-
-   typename Logging::MetadataMap metadata {
-       { "host name", SystemInfo::getHostname() },
-       { "architecture", SystemInfo::getArchitecture() },
-       { "system", SystemInfo::getSystemName() },
-       { "system release", SystemInfo::getSystemRelease() },
-       { "start time", SystemInfo::getCurrentTime() },
-#ifdef HAVE_MPI
-       { "number of MPI processes", convertToString( nproc ) },
-#endif
-       { "OpenMP enabled", convertToString( Devices::Host::isOMPEnabled() ) },
-       { "OpenMP threads", convertToString( Devices::Host::getMaxThreadsCount() ) },
-       { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) },
-       { "CPU cores", convertToString( SystemInfo::getNumberOfCores( cpu_id ) ) },
-       { "CPU threads per core", convertToString( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) },
-       { "CPU max frequency (MHz)", convertToString( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
-       { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
-#ifdef HAVE_CUDA
-       { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) },
-       { "GPU architecture", deviceArch },
-       { "GPU CUDA cores", convertToString( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) },
-       { "GPU clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) },
-       { "GPU global memory (GB)", convertToString( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) },
-       { "GPU memory clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) },
-       { "GPU memory ECC enabled", convertToString( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) },
-#endif
-   };
-
-   return metadata;
-}
-
 } // namespace Benchmarks
 } // namespace TNL
 
diff --git a/src/Benchmarks/Benchmarks.hpp b/src/Benchmarks/Benchmarks.hpp
index ce615c432..dffb93278 100644
--- a/src/Benchmarks/Benchmarks.hpp
+++ b/src/Benchmarks/Benchmarks.hpp
@@ -14,7 +14,7 @@
 #pragma once
 
 #include "Benchmarks.h"
-#include "FunctionTimer.h"
+#include "Utils.h"
 
 #include <iostream>
 #include <exception>
diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 5d0cc9212..f379b690a 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -11,10 +11,10 @@ add_subdirectory( Traversers )
 set( headers
          Benchmarks.h
          Benchmarks.hpp
-         FunctionTimer.h
          Logging.h
          CustomLogging.h
          JsonLogging.h
+         Utils.h
 )
 
 install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
diff --git a/src/Benchmarks/FunctionTimer.h b/src/Benchmarks/FunctionTimer.h
deleted file mode 100644
index 4cadf60b4..000000000
--- a/src/Benchmarks/FunctionTimer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/***************************************************************************
-                          FunctionTimer.h  -  description
-                             -------------------
-    begin                : Dec 25, 2018
-    copyright            : (C) 2018 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky,
-//                 Tomas Oberhuber
-
-#pragma once
-
-#include <tuple>
-
-#include <TNL/Timer.h>
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Solvers/IterativeSolverMonitor.h>
-
-namespace TNL {
-namespace Benchmarks {
-
-// returns a tuple of (loops, mean, stddev) where loops is the number of
-// performed loops (i.e. timing samples), mean is the arithmetic mean of the
-// computation times and stddev is the sample standard deviation
-template< typename Device,
-          typename ComputeFunction,
-          typename ResetFunction,
-          typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
-std::tuple< int, double, double >
-timeFunction( ComputeFunction compute,
-              ResetFunction reset,
-              int maxLoops,
-              const double& minTime,
-              Monitor && monitor = Monitor() )
-{
-   // the timer is constructed zero-initialized and stopped
-   Timer timer;
-
-   // set timer to the monitor
-   monitor.setTimer( timer );
-
-   // warm up
-   reset();
-   compute();
-
-   Containers::Vector< double > results( maxLoops );
-   results.setValue( 0.0 );
-
-   int loops;
-   for( loops = 0;
-        loops < maxLoops || sum( results ) < minTime;
-        loops++ )
-   {
-      // abuse the monitor's "time" for loops
-      monitor.setTime( loops + 1 );
-      reset();
-
-      // Explicit synchronization of the CUDA device
-#ifdef HAVE_CUDA
-      if( std::is_same< Device, Devices::Cuda >::value )
-         cudaDeviceSynchronize();
-#endif
-
-      // reset timer before each computation
-      timer.reset();
-      timer.start();
-      compute();
-#ifdef HAVE_CUDA
-      if( std::is_same< Device, Devices::Cuda >::value )
-         cudaDeviceSynchronize();
-#endif
-      timer.stop();
-
-      results[ loops ] = timer.getRealTime();
-   }
-
-   const double mean = sum( results ) / (double) loops;
-   double stddev;
-   if( loops > 1 )
-      stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
-   else
-      stddev = std::numeric_limits<double>::quiet_NaN();
-   return std::make_tuple( loops, mean, stddev );
-}
-
-} // namespace Benchmarks
-} // namespace TNL
diff --git a/src/Benchmarks/Utils.h b/src/Benchmarks/Utils.h
new file mode 100644
index 000000000..e60adc229
--- /dev/null
+++ b/src/Benchmarks/Utils.h
@@ -0,0 +1,150 @@
+/***************************************************************************
+                          Utils.h  -  description
+                             -------------------
+    begin                : Dec 25, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Jakub Klinkovsky,
+//                 Tomas Oberhuber
+
+#pragma once
+
+#include <tuple>
+#include <map>
+
+#include <TNL/Timer.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Solvers/IterativeSolverMonitor.h>
+
+#include <TNL/Devices/Host.h>
+#include <TNL/SystemInfo.h>
+#include <TNL/Cuda/DeviceInfo.h>
+#include <TNL/Config/ConfigDescription.h>
+#include <TNL/MPI/Wrappers.h>
+
+namespace TNL {
+namespace Benchmarks {
+
+// returns a tuple of (loops, mean, stddev) where loops is the number of
+// performed loops (i.e. timing samples), mean is the arithmetic mean of the
+// computation times and stddev is the sample standard deviation
+template< typename Device,
+          typename ComputeFunction,
+          typename ResetFunction,
+          typename Monitor = TNL::Solvers::IterativeSolverMonitor< double, int > >
+std::tuple< int, double, double >
+timeFunction( ComputeFunction compute,
+              ResetFunction reset,
+              int maxLoops,
+              const double& minTime,
+              Monitor && monitor = Monitor() )
+{
+   // the timer is constructed zero-initialized and stopped
+   Timer timer;
+
+   // set timer to the monitor
+   monitor.setTimer( timer );
+
+   // warm up
+   reset();
+   compute();
+
+   Containers::Vector< double > results( maxLoops );
+   results.setValue( 0.0 );
+
+   int loops;
+   for( loops = 0;
+        loops < maxLoops || sum( results ) < minTime;
+        loops++ )
+   {
+      // abuse the monitor's "time" for loops
+      monitor.setTime( loops + 1 );
+      reset();
+
+      // Explicit synchronization of the CUDA device
+#ifdef HAVE_CUDA
+      if( std::is_same< Device, Devices::Cuda >::value )
+         cudaDeviceSynchronize();
+#endif
+
+      // reset timer before each computation
+      timer.reset();
+      timer.start();
+      compute();
+#ifdef HAVE_CUDA
+      if( std::is_same< Device, Devices::Cuda >::value )
+         cudaDeviceSynchronize();
+#endif
+      timer.stop();
+
+      results[ loops ] = timer.getRealTime();
+   }
+
+   const double mean = sum( results ) / (double) loops;
+   double stddev;
+   if( loops > 1 )
+      stddev = 1.0 / std::sqrt( loops - 1 ) * l2Norm( results - mean );
+   else
+      stddev = std::numeric_limits<double>::quiet_NaN();
+   return std::make_tuple( loops, mean, stddev );
+}
+
+inline std::map< std::string, std::string > getHardwareMetadata()
+{
+   const int cpu_id = 0;
+   const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id );
+   const std::string cacheInfo = std::to_string( cacheSizes.L1data ) + ", "
+                               + std::to_string( cacheSizes.L1instruction ) + ", "
+                               + std::to_string( cacheSizes.L2 ) + ", "
+                               + std::to_string( cacheSizes.L3 );
+#ifdef HAVE_CUDA
+   const int activeGPU = Cuda::DeviceInfo::getActiveDevice();
+   const std::string deviceArch = std::to_string( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "."
+                                + std::to_string( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) );
+#endif
+
+#ifdef HAVE_MPI
+   int nproc = 1;
+   // check if MPI was initialized (some benchmarks do not initialize MPI even when
+   // they are built with HAVE_MPI and thus MPI::GetSize() cannot be used blindly)
+   if( TNL::MPI::Initialized() )
+      nproc = TNL::MPI::GetSize();
+#endif
+
+   std::map< std::string, std::string > metadata {
+       { "host name", SystemInfo::getHostname() },
+       { "architecture", SystemInfo::getArchitecture() },
+       { "system", SystemInfo::getSystemName() },
+       { "system release", SystemInfo::getSystemRelease() },
+       { "start time", SystemInfo::getCurrentTime() },
+#ifdef HAVE_MPI
+       { "number of MPI processes", std::to_string( nproc ) },
+#endif
+       { "OpenMP enabled", std::to_string( Devices::Host::isOMPEnabled() ) },
+       { "OpenMP threads", std::to_string( Devices::Host::getMaxThreadsCount() ) },
+       { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) },
+       { "CPU cores", std::to_string( SystemInfo::getNumberOfCores( cpu_id ) ) },
+       { "CPU threads per core", std::to_string( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) },
+       { "CPU max frequency (MHz)", std::to_string( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) },
+       { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
+#ifdef HAVE_CUDA
+       { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) },
+       { "GPU architecture", deviceArch },
+       { "GPU CUDA cores", std::to_string( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) },
+       { "GPU clock rate (MHz)", std::to_string( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) },
+       { "GPU global memory (GB)", std::to_string( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) },
+       { "GPU memory clock rate (MHz)", std::to_string( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) },
+       { "GPU memory ECC enabled", std::to_string( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) },
+#endif
+   };
+
+   return metadata;
+}
+
+} // namespace Benchmarks
+} // namespace TNL
-- 
GitLab


From 8703690cce9e5dc8b9389f5db8359b9598a040e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Fri, 5 Nov 2021 20:48:51 +0100
Subject: [PATCH 21/40] Benchmarks: refactored JSON logging format

Instead of writing the whole log as a valid JSON object, it is much
easier to write one JSON object per line. This can be processed in
Python with the Pandas package using the following code:

df = pandas.read_json(open("foo.log", "r"), orient="records", lines=True)

Also removed the outputMode parameter from the Benchmark and Logging
classes, since it was pretty useless.
---
 src/Benchmarks/Benchmarks.h              |  5 +-
 src/Benchmarks/Benchmarks.hpp            |  7 +--
 src/Benchmarks/CustomLogging.h           |  9 +---
 src/Benchmarks/JsonLogging.h             | 68 +++++++-----------------
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h | 14 +----
 5 files changed, 26 insertions(+), 77 deletions(-)

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index e8c277f52..c258143c2 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -67,10 +67,7 @@ class Benchmark
       using MetadataColumns = typename Logger::MetadataColumns;
       using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;
 
-      Benchmark( int loops = 10,
-               bool verbose = true,
-               String outputMode = "",
-               bool logFileAppend = false );
+      Benchmark( int loops = 10, bool verbose = true );
 
       static void configSetup( Config::ConfigDescription& config );
 
diff --git a/src/Benchmarks/Benchmarks.hpp b/src/Benchmarks/Benchmarks.hpp
index dffb93278..3205ebb5d 100644
--- a/src/Benchmarks/Benchmarks.hpp
+++ b/src/Benchmarks/Benchmarks.hpp
@@ -25,11 +25,8 @@ namespace Benchmarks {
 
 template< typename Logger >
 Benchmark< Logger >::
-Benchmark( int loops,
-           bool verbose,
-           String outputMode,
-           bool logFileAppend )
-: logger(verbose, outputMode, logFileAppend), loops(loops)
+Benchmark( int loops, bool verbose )
+: logger(verbose), loops(loops)
 {}
 
 template< typename Logger >
diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index 384300b76..4a902e68d 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -23,11 +23,8 @@ class CustomLogging
 : public Logging
 {
 public:
-   CustomLogging( int verbose = true,
-                  std::string outputMode = "",
-                  bool logFileAppend = false )
-   : Logging(verbose), outputMode( outputMode )
-   {}
+   // inherit constructors
+   using Logging::Logging;
 
    virtual void
    writeTitle( const std::string & title ) override
@@ -224,8 +221,6 @@ protected:
 
    MetadataColumns metadataColumns;
    bool header_changed = true;
-
-   std::string outputMode;
 };
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index 423e10072..cb3af155e 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -23,18 +23,12 @@ class JsonLogging
 : public Logging
 {
 public:
-   JsonLogging( int verbose = true,
-                std::string outputMode = "",
-                bool logFileAppend = false )
-   : Logging(verbose), outputMode( outputMode ), logFileAppend( logFileAppend )
-   {}
+   // inherit constructors
+   using Logging::Logging;
 
    virtual void
    writeTitle( const std::string & title ) override
    {
-      if( outputMode == "append" )
-         return;
-
       if( verbose )
          std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
    }
@@ -42,19 +36,12 @@ public:
    virtual void
    writeMetadata( const MetadataMap & metadata ) override
    {
-      if( outputMode == "append" )
-         return;
-
-      if( verbose )
+      if( verbose ) {
          std::cout << "properties:" << std::endl;
-
-      for( auto & it : metadata ) {
-         if( verbose )
+         for( auto & it : metadata )
             std::cout << "   " << it.first << " = " << it.second << std::endl;
-      }
-
-      if( verbose )
          std::cout << std::endl;
+      }
    }
 
    virtual void setMetadataColumns( const MetadataColumns& elements ) override
@@ -113,10 +100,8 @@ public:
    {
       TNL_ASSERT_EQ( headerElements.size(), rowElements.size(), "elements must have equal sizes" );
       TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
-      if( this->lineStarted )
-         log << "," << std::endl;
 
-      log << "      {" << std::endl;
+      log << "{";
 
       // write common logs
       int idx( 0 );
@@ -125,8 +110,8 @@ public:
          if( verbose )
             std::cout << std::setw( 20 ) << lg.second;
          if( idx++ > 0 )
-            log << "," << std::endl;
-         log << "         \"" << lg.first << "\" : \"" << lg.second << "\"";
+            log << ", ";
+         log << "\"" << lg.first << "\": \"" << lg.second << "\"";
       }
 
       std::size_t i = 0;
@@ -135,17 +120,16 @@ public:
          if( verbose )
             std::cout << std::setw( widths[ i ] ) << el;
          if( idx++ > 0 )
-            log << "," << std::endl;
-         log << "         \"" << headerElements[ i ] << "\" : \"" << el << "\"";
+            log << ", ";
+         log << "\"" << headerElements[ i ] << "\": \"" << el << "\"";
          i++;
       }
       if( ! errorMessage.empty() ) {
          if( idx++ > 0 )
-            log << "," << std::endl;
-         log << "         \"error\" : \"" << errorMessage << "\"";
+            log << ", ";
+         log << "\"error\": \"" << errorMessage << "\"";
       }
-      log << std::endl << "      }";
-      this->lineStarted = true;
+      log << "}" << std::endl;
       if( verbose )
          std::cout << std::endl;
    }
@@ -164,26 +148,22 @@ public:
    virtual void
    writeErrorMessage( const std::string& message ) override
    {
-      if( this->lineStarted )
-         log << "," << std::endl;
-
-      log << "      {" << std::endl;
+      log << "{";
 
       // write common logs
       int idx( 0 );
       for( auto lg : this->metadataColumns )
       {
          if( idx++ > 0 )
-            log << "," << std::endl;
-         log << "         \"" << lg.first << "\" : \"" << lg.second << "\"";
+            log << ", ";
+         log << "\"" << lg.first << "\": \"" << lg.second << "\"";
       }
 
       if( idx++ > 0 )
-         log << "," << std::endl;
-      log << "         \"error\" : \"" << message << "\"";
+         log << ", ";
+      log << "\"error\": \"" << message << "\"";
 
-      log << std::endl << "      }";
-      this->lineStarted = true;
+      log << "}" << std::endl;
    }
 
    virtual void
@@ -194,11 +174,6 @@ public:
 
    virtual bool save( std::ostream & logFile ) override
    {
-      if( ! this->logFileAppend )
-      {
-         logFile << "{" << std::endl;
-         logFile << "   \"results\" : [ " << std::endl;
-      }
       logFile << log.str();
       if( logFile.good() ) {
          log.str() = "";
@@ -225,11 +200,6 @@ protected:
 
    MetadataColumns metadataColumns;
    bool header_changed = true;
-
-   std::string outputMode;
-
-   bool lineStarted = false;
-   bool logFileAppend = false;
 };
 
 } // namespace Benchmarks
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 0f3f29b1d..639ff7390 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -80,10 +80,9 @@ setupConfig( Config::ConfigDescription & config )
    config.addEntry< bool >( "with-legacy-matrices", "Perform benchmark even for legacy TNL matrix formats.", true );
    config.addEntry< bool >( "with-all-cpu-tests", "All matrix formats are tested on both CPU and GPU. ", false );
    config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv::" + getCurrDateTime() + ".log");
-   config.addEntry< String >( "output-mode", "Mode for opening the log file - 'close' will only finalize the log file.", "append" );
+   config.addEntry< String >( "output-mode", "Mode for opening the log file.", "append" );
    config.addEntryEnum( "append" );
    config.addEntryEnum( "overwrite" );
-   config.addEntryEnum( "close" );
    config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
    config.addEntryEnum( "float" );
    config.addEntryEnum( "double" );
@@ -134,22 +133,13 @@ main( int argc, char* argv[] )
    const int verboseMR = parameters.getParameter< int >( "verbose-MReader" );
 
    // open log file
-   if( outputMode == "close" )
-   {
-      std::fstream file;
-      file.open( logFileName.getString(), std::ios::out | std::ios::app );
-      file << std::endl << "   ]" << std::endl << "}";
-      return EXIT_SUCCESS;
-   }
    if( inputFileName == "" )
    {
       std::cerr << "ERROR: Input file name is required." << std::endl;
       return EXIT_FAILURE;
    }
-   bool logFileAppend( false );
    if( std::experimental::filesystem::exists(logFileName.getString()) )
    {
-      logFileAppend = true;
       std::cout << "Log file " << logFileName << " exists and ";
       if( outputMode == "append" )
          std::cout << "new logs will be appended." << std::endl;
@@ -163,7 +153,7 @@ main( int argc, char* argv[] )
    std::ofstream logFile( logFileName.getString(), mode );
 
    // init benchmark and common metadata
-   TNL::Benchmarks::SpMV::BenchmarkType benchmark( loops, verbose, outputMode, logFileAppend );
+   TNL::Benchmarks::SpMV::BenchmarkType benchmark( loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
-- 
GitLab


From 554db9e5041c58fd5fa2d30a7a558ecd758ba93e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sat, 6 Nov 2021 12:03:57 +0100
Subject: [PATCH 22/40] Benchmarks: optimized building of the SpMV benchmark
 using explicit template instantiation

The build time improved from 4:48 to 1:43 on 16 cores, so the efficiency
is not great, but it's still better than nothing. Maybe the benchmarks
could be reorganized into more independent tasks that could be
explicitly instantiated more efficiently (even if there would be less
explicit instantiations eventually).
---
 scripts/eti.py                                |  74 ++++++++
 src/Benchmarks/SpMV/CMakeLists.txt            |  18 +-
 .../ReferenceFormats/LightSpMVBenchmark.h     |   4 +-
 src/Benchmarks/SpMV/eti.py                    |   1 +
 src/Benchmarks/SpMV/spmv.h                    | 158 +++++++++++++++---
 .../SpMV/spmv.templates/spmv.t0.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t0.cu |   8 +
 .../SpMV/spmv.templates/spmv.t1.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t1.cu |   8 +
 .../SpMV/spmv.templates/spmv.t2.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t2.cu |   8 +
 .../SpMV/spmv.templates/spmv.t3.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t3.cu |   8 +
 .../SpMV/spmv.templates/spmv.t4.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t4.cu |   8 +
 .../SpMV/spmv.templates/spmv.t5.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t5.cu |   8 +
 .../SpMV/spmv.templates/spmv.t6.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t6.cu |   8 +
 .../SpMV/spmv.templates/spmv.t7.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t7.cu |   8 +
 .../SpMV/spmv.templates/spmv.t8.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t8.cu |   8 +
 .../SpMV/spmv.templates/spmv.t9.cpp           |   8 +
 src/Benchmarks/SpMV/spmv.templates/spmv.t9.cu |   8 +
 25 files changed, 384 insertions(+), 31 deletions(-)
 create mode 100755 scripts/eti.py
 create mode 120000 src/Benchmarks/SpMV/eti.py
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t0.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t0.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t1.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t1.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t2.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t2.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t3.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t3.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t4.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t4.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t5.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t5.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t6.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t6.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t7.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t7.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t8.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t8.cu
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t9.cpp
 create mode 100644 src/Benchmarks/SpMV/spmv.templates/spmv.t9.cu

diff --git a/scripts/eti.py b/scripts/eti.py
new file mode 100755
index 000000000..89d78876d
--- /dev/null
+++ b/scripts/eti.py
@@ -0,0 +1,74 @@
+#! /usr/bin/env python3
+
+import os.path
+import pathlib
+import re
+import sys
+
+if len(sys.argv) != 2:
+    print(f"usage: {sys.argv[0]} FILE\n\nwhere FILE is a C++ source code or header file.", file=sys.stderr)
+    sys.exit(1)
+if not os.path.isfile(sys.argv[1]):
+    print(f"error: {sys.argv[1]} is not a valid file.", file=sys.stderr)
+    sys.exit(1)
+
+src = sys.argv[1]
+basename = os.path.splitext(os.path.basename(src))[0]
+dirname = f"{basename}.templates"
+
+if not os.path.isdir(dirname):
+    os.mkdir(dirname)
+
+def get_source_code(namespaces, extern_template_instantiation):
+    eti = extern_template_instantiation.strip().replace("extern ", "", 1)
+    # use absolute path for the include when src is an absolute path
+    # (e.g. when called by CMake, because relative include does not work with
+    # its separate build dir structure)
+    if src == os.path.abspath(src):
+        source_code = f"#include \"{src}\"\n"
+    # use relative path for the include when src is relative
+    else:
+        relpath = os.path.relpath(src, dirname)
+        source_code = f"#include \"{relpath}\"\n"
+    for ns in namespaces:
+        source_code += f"namespace {ns} {{\n"
+    source_code += eti + "\n"
+    for ns in namespaces:
+        source_code += f"}} // namespace {ns}\n"
+    return source_code
+
+def check_write(content, fname):
+    write = False
+    if os.path.isfile(fname):
+        write = open(fname, "r").read().strip() != content.strip()
+    else:
+        write = True
+
+    if write is True:
+        with open(fname, "w") as out:
+            out.write(content)
+
+i = 0
+namespaces = []
+file_names = set()
+for line in open(src).readlines():
+    # heuristics for namespaces
+    ns_begin = re.search(r"^\s*namespace\s+(\w+)\s*\{$", line)
+    if ns_begin:
+        namespaces.append(ns_begin.group(1))
+    ns_end = re.search(r"^\s*\}\s*\/\/\s*namespace\s+(\w+)$", line)
+    if ns_end:
+        namespaces.pop(-1)
+
+    if line.strip().startswith("extern template"):
+        source_code = get_source_code(namespaces, line)
+        for ext in ["cpp", "cu"]:
+            fname = f"{dirname}/{basename}.t{i}.{ext}"
+            check_write(source_code, fname)
+            file_names.add(fname)
+        i += 1
+
+# remove extraneous files from the target directory
+for path in pathlib.Path(dirname).iterdir():
+    if str(path) not in file_names:
+        path.unlink()
diff --git a/src/Benchmarks/SpMV/CMakeLists.txt b/src/Benchmarks/SpMV/CMakeLists.txt
index 93dccab0d..499808853 100644
--- a/src/Benchmarks/SpMV/CMakeLists.txt
+++ b/src/Benchmarks/SpMV/CMakeLists.txt
@@ -9,15 +9,17 @@
 #include( cmake/BuildCSR5.cmake )
 
 if( BUILD_CUDA )
-    cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} )
-    message( STATUS ${CXX_BENCHMARKS_FLAGS} )
-    CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${PETSC_LINKER_FLAGS})
+   file( GLOB EXPLICIT_TEMPLATES spmv.templates/*.cu )
+   cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} )
+   cuda_add_executable( tnl-benchmark-spmv tnl-benchmark-spmv.cu ${EXPLICIT_TEMPLATES} ReferenceFormats/LightSpMV-1.0/SpMV.cu ReferenceFormats/LightSpMV-1.0/SpMVCSR.cu
+                        OPTIONS ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
+   target_link_libraries( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${PETSC_LINKER_FLAGS})
 else()
-    ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp )
-    target_compile_options( tnl-benchmark-spmv  PRIVATE ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
-    target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} )
-    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${PETSC_LINKER_FLAGS} )
+   file( GLOB EXPLICIT_TEMPLATES spmv.templates/*.cpp )
+   add_executable( tnl-benchmark-spmv tnl-benchmark-spmv.cpp ${EXPLICIT_TEMPLATES} )
+   target_compile_options( tnl-benchmark-spmv  PRIVATE ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
+   target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} )
+   target_link_libraries( tnl-benchmark-spmv ${PETSC_LINKER_FLAGS} )
 endif()
 
 install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )
diff --git a/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h b/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h
index 7d6ffde49..1d6e34320 100644
--- a/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h
+++ b/src/Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h
@@ -17,8 +17,8 @@
 #pragma push
 #pragma diag_suppress = 1444
 #include "LightSpMV-1.0/SpMV.h"
-#include "LightSpMV-1.0/SpMV.cu"
-#include "LightSpMV-1.0/SpMVCSR.cu"
+//#include "LightSpMV-1.0/SpMV.cu"
+//#include "LightSpMV-1.0/SpMVCSR.cu"
 #pragma pop
 #endif
 #include <TNL/Matrices/SparseMatrix.h>
diff --git a/src/Benchmarks/SpMV/eti.py b/src/Benchmarks/SpMV/eti.py
new file mode 120000
index 000000000..c59a420e1
--- /dev/null
+++ b/src/Benchmarks/SpMV/eti.py
@@ -0,0 +1 @@
+../../../scripts/eti.py
\ No newline at end of file
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 70ed76652..a568ee351 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -43,12 +43,6 @@
 #include <petscmat.h>
 #endif
 
-// Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation
-#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
-#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
-#define WITH_TNL_BENCHMARK_SPMV_BINARY_MATRICES
-#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
-
 // Uncomment the following line to enable benchmarking the sandbox sparse matrix.
 //#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
 #ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
@@ -197,8 +191,7 @@ template< typename Real, typename Device, typename Index >
 using SlicedEllpackAlias = Benchmarks::SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >;
 
 template< typename Real,
-          template< typename, typename, typename > class Matrix,
-          template< typename, typename, typename, typename > class Vector = Containers::Vector >
+          template< typename, typename, typename > class Matrix >
 void
 benchmarkSpMVLegacy( BenchmarkType& benchmark,
                      const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
@@ -713,7 +706,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
    csrHostMatrix.reset();
 
    bool allCpuTests = parameters.getParameter< bool >( "with-all-cpu-tests" );
-#ifdef WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
    /////
    // Benchmarking of TNL legacy formats
    //
@@ -735,12 +727,10 @@ benchmarkSpmv( BenchmarkType& benchmark,
       benchmarkSpMVLegacy< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkSpMVLegacy< Real, Legacy::ChunkedEllpack                    >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkSpMVLegacy< Real, Legacy::BiEllpack                         >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+      // AdEllpack is broken
+      //benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
    }
-   // AdEllpack is broken
-   //benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
-#endif
 
-#ifdef WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
    /////
    // Benchmarking TNL formats
    //
@@ -756,7 +746,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
    benchmarkSpMV< Real, HostMatrixType, SparseMatrix_SlicedEllpack                >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
    benchmarkSpMV< Real, HostMatrixType, SparseMatrix_ChunkedEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
    benchmarkSpMV< Real, HostMatrixType, SparseMatrix_BiEllpack                    >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-#ifdef WITH_TNL_BENCHMARK_SPMV_BINARY_MATRICES
    benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_CSR_Scalar              >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
    benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_CSR_Vector              >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
    benchmarkSpMVCSRLight< Real, HostMatrixType, SparseMatrix_CSR_Light, bool >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
@@ -765,14 +754,11 @@ benchmarkSpmv( BenchmarkType& benchmark,
    benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_SlicedEllpack           >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
    benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_ChunkedEllpack          >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
    benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_BiEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-#endif
 #ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
-   benchmarkSpMV< Real, HostMatrixType, SparseSandboxMatrix                       >( benchmark, hostMatrix, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseSandboxMatrix                       >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
 #endif
    hostMatrix.reset();
-#endif
 
-#ifdef WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
    /////
    // Benchmarking symmetric sparse matrices
    //
@@ -806,7 +792,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
       benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack                 >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack                >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack                     >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-#ifdef WITH_TNL_BENCHMARK_SPMV_BINARY_MATRICES
       benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Scalar              >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Vector              >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
       //benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Hybrid            >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
@@ -816,11 +801,142 @@ benchmarkSpmv( BenchmarkType& benchmark,
       benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack           >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack          >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
       benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack               >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-#endif
    }
-#endif
 }
 
+// =============== EXPLICIT TEMPLATE INSTANTIATIONS ===============
+// The explicit template declarations (extern ...) are converted to definitions
+// in separate source files using the eti.py script. The developer should call
+// this script whenever the declarations are changed and commit the generated
+// definitions in the git repository.
+//
+// IMPORTANT:
+// - Each template instantiation must be written on exactly one line (the code
+//   generator script (spmv.py) does not support parsing multiple lines).
+// - Make sure that all "benchmark*" functions that are called above are
+//   instantiated below.
+// - Also make sure that all functions that are explicitly instantiated below
+//   are actually used.
+// - Explicit template instantiations cannot be guarded by #ifdef (the code
+//   generator script (spmv.py) does not support parsing macros).
+
+extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Scalar             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Vector             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light              >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light2             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light3             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light4             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light5             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light6             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Adaptive           >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_MultiVector        >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_LightWithoutAtomic >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, ReferenceFormats::Legacy::Ellpack         >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, SlicedEllpackAlias                        >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, ReferenceFormats::Legacy::ChunkedEllpack  >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< float, ReferenceFormats::Legacy::BiEllpack       >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+// AdEllpack is broken
+//extern template void benchmarkSpMV< float, Matrices::AdEllpack              >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
+
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Scalar                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Vector                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Light            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Adaptive                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_Ellpack                      >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_SlicedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_ChunkedEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_BiEllpack                    >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+#ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseSandboxMatrix                       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+#endif
+
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar                    >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector                    >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light             >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive                  >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack                     >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+
+
+extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Scalar             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Vector             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light              >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light2             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light3             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light4             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light5             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light6             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Adaptive           >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_MultiVector        >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_LightWithoutAtomic >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, ReferenceFormats::Legacy::Ellpack         >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, SlicedEllpackAlias                        >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, ReferenceFormats::Legacy::ChunkedEllpack  >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVLegacy< double, ReferenceFormats::Legacy::BiEllpack       >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+// AdEllpack is broken
+//extern template void benchmarkSpMV< double, Matrices::AdEllpack              >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
+
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Scalar                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Vector                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Light            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Adaptive                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_Ellpack                      >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_SlicedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_ChunkedEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_BiEllpack                    >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+#ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseSandboxMatrix                       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+#endif
+
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar                    >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector                    >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light             >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive                  >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack                     >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+//extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+
       } // namespace SpMV
    } // namespace Benchmarks
 } // namespace TNL
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t0.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t0.cpp
new file mode 100644
index 000000000..fafd4b2b0
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t0.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchLegacy< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t0.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t0.cu
new file mode 100644
index 000000000..fafd4b2b0
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t0.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchLegacy< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t1.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t1.cpp
new file mode 100644
index 000000000..fbd1d792a
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t1.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchLegacy< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t1.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t1.cu
new file mode 100644
index 000000000..fbd1d792a
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t1.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchLegacy< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t2.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t2.cpp
new file mode 100644
index 000000000..bf67f96e1
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t2.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchBinary< float >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t2.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t2.cu
new file mode 100644
index 000000000..bf67f96e1
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t2.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchBinary< float >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t3.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t3.cpp
new file mode 100644
index 000000000..3d11252c9
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t3.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchBinary< double >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t3.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t3.cu
new file mode 100644
index 000000000..3d11252c9
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t3.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchBinary< double >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t4.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t4.cpp
new file mode 100644
index 000000000..735f853e7
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t4.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSpMV< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t4.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t4.cu
new file mode 100644
index 000000000..735f853e7
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t4.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSpMV< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t5.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t5.cpp
new file mode 100644
index 000000000..e880c7d58
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t5.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSpMV< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t5.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t5.cu
new file mode 100644
index 000000000..e880c7d58
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t5.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSpMV< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t6.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t6.cpp
new file mode 100644
index 000000000..082fe7967
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t6.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetric< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t6.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t6.cu
new file mode 100644
index 000000000..082fe7967
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t6.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetric< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t7.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t7.cpp
new file mode 100644
index 000000000..a1191d34d
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t7.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetric< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t7.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t7.cu
new file mode 100644
index 000000000..a1191d34d
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t7.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetric< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t8.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t8.cpp
new file mode 100644
index 000000000..fea28e004
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t8.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetricBinary< float >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t8.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t8.cu
new file mode 100644
index 000000000..fea28e004
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t8.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetricBinary< float >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t9.cpp b/src/Benchmarks/SpMV/spmv.templates/spmv.t9.cpp
new file mode 100644
index 000000000..16c242a79
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t9.cpp
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetricBinary< double >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
diff --git a/src/Benchmarks/SpMV/spmv.templates/spmv.t9.cu b/src/Benchmarks/SpMV/spmv.templates/spmv.t9.cu
new file mode 100644
index 000000000..16c242a79
--- /dev/null
+++ b/src/Benchmarks/SpMV/spmv.templates/spmv.t9.cu
@@ -0,0 +1,8 @@
+#include "../spmv.h"
+namespace TNL {
+namespace Benchmarks {
+namespace SpMV {
+template void dispatchSymmetricBinary< double >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+} // namespace TNL
+} // namespace Benchmarks
+} // namespace SpMV
-- 
GitLab


From 1f28c5c9c0957363acf625cb0ada9de1810b2cdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sat, 6 Nov 2021 13:24:43 +0100
Subject: [PATCH 23/40] Benchmarks: reorganized dispatching of SpMV benchmarks
 to optimize compilation work load

The performance on 16 cores is about the same as before, but it takes
significantly less tasks so it should be much faster on less cores.
---
 src/Benchmarks/SpMV/spmv.h | 360 ++++++++++++++++---------------------
 1 file changed, 156 insertions(+), 204 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index a568ee351..d4abd419d 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -272,8 +272,7 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
 
 template< typename Real,
           typename InputMatrix,
-          template< typename, typename, typename > class Matrix,
-          template< typename, typename, typename, typename > class Vector = Containers::Vector >
+          template< typename, typename, typename > class Matrix >
 void
 benchmarkSpMV( BenchmarkType& benchmark,
                const InputMatrix& inputMatrix,
@@ -355,8 +354,7 @@ benchmarkSpMV( BenchmarkType& benchmark,
 template< typename Real,
           typename InputMatrix,
           template< typename, typename, typename > class Matrix,
-          typename TestReal = Real,
-          template< typename, typename, typename, typename > class Vector = Containers::Vector >
+          typename TestReal = Real >
 void
 benchmarkSpMVCSRLight( BenchmarkType& benchmark,
                        const InputMatrix& inputMatrix,
@@ -458,8 +456,7 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
 
 template< typename Real,
           typename InputMatrix,
-          template< typename, typename, typename > class Matrix,
-          template< typename, typename, typename, typename > class Vector = Containers::Vector >
+          template< typename, typename, typename > class Matrix >
 void
 benchmarkBinarySpMV( BenchmarkType& benchmark,
                      const InputMatrix& inputMatrix,
@@ -538,6 +535,138 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
  #endif
 }
 
+template< typename Real >
+void
+dispatchLegacy( BenchmarkType& benchmark,
+                const TNL::Containers::Vector< Real, Devices::Host, int >& hostOutVector,
+                const String& inputFileName,
+                bool allCpuTests,
+                bool verboseMR )
+{
+   using namespace Benchmarks::SpMV::ReferenceFormats;
+   benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Scalar             >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Vector             >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light              >( benchmark, hostOutVector, inputFileName, verboseMR );
+   //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light2             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light3             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light4             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light5             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light6             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Adaptive           >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_MultiVector        >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_LightWithoutAtomic >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, Legacy::Ellpack                           >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, Legacy::ChunkedEllpack                    >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVLegacy< Real, Legacy::BiEllpack                         >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   // AdEllpack is broken
+   //benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
+}
+
+template< typename Real, typename HostMatrix >
+void
+dispatchBinary( BenchmarkType& benchmark,
+                const HostMatrix& hostMatrix,
+                const TNL::Containers::Vector< Real, Devices::Host, int >& hostOutVector,
+                const String& inputFileName,
+                bool allCpuTests,
+                bool verboseMR )
+{
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_CSR_Scalar              >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_CSR_Vector              >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVCSRLight< Real, HostMatrix, SparseMatrix_CSR_Light, bool >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_CSR_Adaptive            >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_Ellpack                 >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_SlicedEllpack           >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_ChunkedEllpack          >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, HostMatrix, SparseMatrix_BiEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+}
+
+template< typename Real >
+void
+dispatchSpMV( BenchmarkType& benchmark,
+              const TNL::Containers::Vector< Real, Devices::Host, int >& hostOutVector,
+              const String& inputFileName,
+              bool allCpuTests,
+              bool verboseMR )
+{
+   using HostMatrixType = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host >;
+   HostMatrixType hostMatrix;
+   TNL::Matrices::MatrixReader< HostMatrixType >::readMtx( inputFileName, hostMatrix, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Scalar                   >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Vector                   >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   //benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Hybrid                   >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVCSRLight< Real, HostMatrixType, SparseMatrix_CSR_Light            >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Adaptive                 >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_Ellpack                      >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_SlicedEllpack                >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_ChunkedEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_BiEllpack                    >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   dispatchBinary< Real >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+#ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
+   benchmarkSpMV< Real, HostMatrixType, SparseSandboxMatrix                       >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+#endif
+}
+
+template< typename Real, typename SymmetricInputMatrix >
+void
+dispatchSymmetricBinary( BenchmarkType& benchmark,
+                         const SymmetricInputMatrix& symmetricHostMatrix,
+                         const TNL::Containers::Vector< Real, Devices::Host, int >& hostOutVector,
+                         const String& inputFileName,
+                         bool allCpuTests,
+                         bool verboseMR )
+{
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Scalar              >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Vector              >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   //benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Hybrid            >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVCSRLight< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Light, bool       >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Adaptive            >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_Ellpack                 >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack           >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack          >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack               >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+}
+
+template< typename Real >
+void
+dispatchSymmetric( BenchmarkType& benchmark,
+                   const TNL::Containers::Vector< Real, Devices::Host, int >& hostOutVector,
+                   const String& inputFileName,
+                   bool allCpuTests,
+                   bool verboseMR )
+{
+   using SymmetricInputMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix >;
+   using InputMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int >;
+   SymmetricInputMatrix symmetricHostMatrix;
+   try
+   {
+      TNL::Matrices::MatrixReader< SymmetricInputMatrix >::readMtx( inputFileName, symmetricHostMatrix, verboseMR );
+   }
+   catch(const std::exception& e)
+   {
+      std::cerr << e.what() << " ... SKIPPING " << std::endl;
+      return;
+   }
+   InputMatrix hostMatrix;
+   TNL::Matrices::MatrixReader< InputMatrix >::readMtx( inputFileName, hostMatrix, verboseMR );
+   // TODO: Comparison of symmetric and general matrix does not work yet.
+   //if( hostMatrix != symmetricHostMatrix )
+   //{
+   //   std::cerr << "ERROR: Symmetric matrices do not match !!!" << std::endl;
+   //}
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Scalar                    >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Vector                    >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   //benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Hybrid                   >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMVCSRLight< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Light             >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Adaptive                  >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_Ellpack                       >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack                 >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack                >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack                     >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+   dispatchSymmetricBinary< Real >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
+}
+
 template< typename Real = double,
           typename Index = int >
 void
@@ -710,98 +839,18 @@ benchmarkSpmv( BenchmarkType& benchmark,
    // Benchmarking of TNL legacy formats
    //
    if( parameters.getParameter< bool >("with-legacy-matrices") )
-   {
-      using namespace Benchmarks::SpMV::ReferenceFormats;
-      benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Scalar             >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Vector             >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light              >( benchmark, hostOutVector, inputFileName, verboseMR );
-      //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light2             >( benchmark, hostOutVector, inputFileName, verboseMR );
-      //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light3             >( benchmark, hostOutVector, inputFileName, verboseMR );
-      //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light4             >( benchmark, hostOutVector, inputFileName, verboseMR );
-      //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light5             >( benchmark, hostOutVector, inputFileName, verboseMR );
-      //benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Light6             >( benchmark, hostOutVector, inputFileName, verboseMR );
-      benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_Adaptive           >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_MultiVector        >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, SparseMatrixLegacy_CSR_LightWithoutAtomic >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, Legacy::Ellpack                           >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, Legacy::ChunkedEllpack                    >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVLegacy< Real, Legacy::BiEllpack                         >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      // AdEllpack is broken
-      //benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
-   }
+      dispatchLegacy< Real >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
 
    /////
    // Benchmarking TNL formats
    //
-   using HostMatrixType = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host >;
-   HostMatrixType hostMatrix;
-   TNL::Matrices::MatrixReader< HostMatrixType >::readMtx( inputFileName, hostMatrix, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Scalar                   >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Vector                   >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   //benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Hybrid                   >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMVCSRLight< Real, HostMatrixType, SparseMatrix_CSR_Light            >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_CSR_Adaptive                 >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_Ellpack                      >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_SlicedEllpack                >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_ChunkedEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMV< Real, HostMatrixType, SparseMatrix_BiEllpack                    >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_CSR_Scalar              >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_CSR_Vector              >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkSpMVCSRLight< Real, HostMatrixType, SparseMatrix_CSR_Light, bool >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_CSR_Adaptive            >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_Ellpack                 >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_SlicedEllpack           >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_ChunkedEllpack          >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   benchmarkBinarySpMV< Real, HostMatrixType, SparseMatrix_BiEllpack               >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-#ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
-   benchmarkSpMV< Real, HostMatrixType, SparseSandboxMatrix                       >( benchmark, hostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-#endif
-   hostMatrix.reset();
+   dispatchSpMV< Real >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
 
    /////
    // Benchmarking symmetric sparse matrices
    //
    if( parameters.getParameter< bool >("with-symmetric-matrices") )
-   {
-      using SymmetricInputMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix >;
-      using InputMatrix = TNL::Matrices::SparseMatrix< Real, TNL::Devices::Host, int >;
-      SymmetricInputMatrix symmetricHostMatrix;
-      try
-      {
-         TNL::Matrices::MatrixReader< SymmetricInputMatrix >::readMtx( inputFileName, symmetricHostMatrix, verboseMR );
-      }
-      catch(const std::exception& e)
-      {
-         std::cerr << e.what() << " ... SKIPPING " << std::endl;
-         return;
-      }
-      InputMatrix hostMatrix;
-      TNL::Matrices::MatrixReader< InputMatrix >::readMtx( inputFileName, hostMatrix, verboseMR );
-      // TODO: Comparison of symmetric and general matrix does not work yet.
-      //if( hostMatrix != symmetricHostMatrix )
-      //{
-      //   std::cerr << "ERROR: Symmetric matrices do not match !!!" << std::endl;
-      //}
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Scalar                    >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Vector                    >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      //benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Hybrid                   >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVCSRLight< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Light             >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Adaptive                  >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_Ellpack                       >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack                 >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack                >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack                     >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Scalar              >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Vector              >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      //benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Hybrid            >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkSpMVCSRLight< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Light, bool       >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_CSR_Adaptive            >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_Ellpack                 >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_SlicedEllpack           >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_ChunkedEllpack          >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-      benchmarkBinarySpMV< Real, SymmetricInputMatrix, SymmetricSparseMatrix_BiEllpack               >( benchmark, symmetricHostMatrix, hostOutVector, inputFileName, allCpuTests, verboseMR );
-   }
+      dispatchSymmetric< Real >( benchmark, hostOutVector, inputFileName, allCpuTests, verboseMR );
 }
 
 // =============== EXPLICIT TEMPLATE INSTANTIATIONS ===============
@@ -813,129 +862,32 @@ benchmarkSpmv( BenchmarkType& benchmark,
 // IMPORTANT:
 // - Each template instantiation must be written on exactly one line (the code
 //   generator script (spmv.py) does not support parsing multiple lines).
-// - Make sure that all "benchmark*" functions that are called above are
+// - Make sure that all "dispatch*" functions that are called above are
 //   instantiated below.
 // - Also make sure that all functions that are explicitly instantiated below
 //   are actually used.
 // - Explicit template instantiations cannot be guarded by #ifdef (the code
 //   generator script (spmv.py) does not support parsing macros).
+// - For optimum compilation performance, the explicitly instantiated functions
+//   should be as independent as possible. The compilation of each explicit
+//   instantiation should take about the same time so that the work load in a
+//   parallel build is balanced. Functions that are not instantiated explicitly
+//   will be compiled in the main unit that is compiled serially.
 
-extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Scalar             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Vector             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light              >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light2             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light3             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light4             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light5             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Light6             >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_Adaptive           >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_MultiVector        >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, SparseMatrixLegacy_CSR_LightWithoutAtomic >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, ReferenceFormats::Legacy::Ellpack         >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, SlicedEllpackAlias                        >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, ReferenceFormats::Legacy::ChunkedEllpack  >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< float, ReferenceFormats::Legacy::BiEllpack       >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-// AdEllpack is broken
-//extern template void benchmarkSpMV< float, Matrices::AdEllpack              >( BenchmarkType&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool );
-
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Scalar                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Vector                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Light            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Adaptive                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_Ellpack                      >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_SlicedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_ChunkedEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_BiEllpack                    >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-#ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host >, SparseSandboxMatrix                       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-#endif
+extern template void dispatchLegacy< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void dispatchLegacy< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
 
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar                    >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector                    >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light             >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive                  >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack                     >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< float, Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
-
-
-extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Scalar             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Vector             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light              >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light2             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light3             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light4             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light5             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-//extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Light6             >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_Adaptive           >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_MultiVector        >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, SparseMatrixLegacy_CSR_LightWithoutAtomic >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, ReferenceFormats::Legacy::Ellpack         >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, SlicedEllpackAlias                        >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, ReferenceFormats::Legacy::ChunkedEllpack  >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVLegacy< double, ReferenceFormats::Legacy::BiEllpack       >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-// AdEllpack is broken
-//extern template void benchmarkSpMV< double, Matrices::AdEllpack              >( BenchmarkType&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool );
-
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Scalar                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Vector                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Light            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Adaptive                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_Ellpack                      >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_SlicedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_ChunkedEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_BiEllpack                    >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-#ifdef WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host >, SparseSandboxMatrix                       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-#endif
+extern template void dispatchBinary< float >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void dispatchBinary< double >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+
+extern template void dispatchSpMV< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void dispatchSpMV< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+
+extern template void dispatchSymmetric< float >( BenchmarkType&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void dispatchSymmetric< double >( BenchmarkType&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
 
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar                    >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector                    >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid                   >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light             >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive                  >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack                >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack                     >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Scalar              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Vector              >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-//extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Hybrid            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkSpMVCSRLight< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Light, bool       >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_CSR_Adaptive            >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_Ellpack                 >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_SlicedEllpack           >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_ChunkedEllpack          >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
-extern template void benchmarkBinarySpMV< double, Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >, SymmetricSparseMatrix_BiEllpack               >( BenchmarkType&, const Matrices::SparseMatrix< double, Devices::Host, int, Matrices::SymmetricMatrix >&, const TNL::Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
+extern template void dispatchSymmetricBinary< float >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< float, Devices::Host, int >&, const String&, bool, bool );
+extern template void dispatchSymmetricBinary< double >( BenchmarkType&, const Matrices::SparseMatrix< float, Devices::Host >&, const Containers::Vector< double, Devices::Host, int >&, const String&, bool, bool );
 
       } // namespace SpMV
    } // namespace Benchmarks
-- 
GitLab


From 03c20cfe2b097d30b2b500058f7f3b805938eae8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 7 Nov 2021 21:34:36 +0100
Subject: [PATCH 24/40] Benchmarks: added logging of the performer to the JSON
 format

---
 src/Benchmarks/CustomLogging.h | 6 +++---
 src/Benchmarks/JsonLogging.h   | 3 ++-
 src/Benchmarks/Logging.h       | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Benchmarks/CustomLogging.h b/src/Benchmarks/CustomLogging.h
index 4a902e68d..7f5dafeb9 100644
--- a/src/Benchmarks/CustomLogging.h
+++ b/src/Benchmarks/CustomLogging.h
@@ -154,7 +154,7 @@ public:
    }
 
    virtual void
-   logResult( const std::string& spanningElement,
+   logResult( const std::string& performer,
               const HeaderElements& headerElements,
               const RowElements& rowElements,
               const WidthHints& columnWidthHints,
@@ -162,8 +162,8 @@ public:
    {
       TNL_ASSERT_EQ( headerElements.size(), rowElements.size(), "elements must have equal sizes" );
       TNL_ASSERT_EQ( headerElements.size(), columnWidthHints.size(), "elements must have equal sizes" );
-      writeTableHeader( spanningElement, headerElements );
-      writeTableRow( spanningElement, rowElements, errorMessage );
+      writeTableHeader( performer, headerElements );
+      writeTableRow( performer, rowElements, errorMessage );
    }
 
    virtual void
diff --git a/src/Benchmarks/JsonLogging.h b/src/Benchmarks/JsonLogging.h
index cb3af155e..5ed52a13e 100644
--- a/src/Benchmarks/JsonLogging.h
+++ b/src/Benchmarks/JsonLogging.h
@@ -135,12 +135,13 @@ public:
    }
 
    virtual void
-   logResult( const std::string& spanningElement,
+   logResult( const std::string& performer,
               const HeaderElements& headerElements,
               const RowElements& rowElements,
               const WidthHints& columnWidthHints,
               const std::string& errorMessage = "" ) override
    {
+      setMetadataElement({ "performer", performer });
       writeHeader( headerElements, columnWidthHints );
       writeRow( headerElements, rowElements, columnWidthHints, errorMessage );
    }
diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h
index 99edf022e..2e27195e7 100644
--- a/src/Benchmarks/Logging.h
+++ b/src/Benchmarks/Logging.h
@@ -111,7 +111,7 @@ public:
                                     int insertPosition = -1 /* negative values insert from the end */ ) = 0;
 
    virtual void
-   logResult( const std::string& spanningElement,
+   logResult( const std::string& performer,
               const HeaderElements& headerElements,
               const RowElements& rowElements,
               const WidthHints& columnWidthHints,
-- 
GitLab


From adb055efa858b9e714711c7afc9b2958ed3a2e42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 7 Nov 2021 10:42:26 +0100
Subject: [PATCH 25/40] Moved common headers from Benchmarks to TNL/Benchmarks
 so they can be used in other projects more easily

---
 src/Benchmarks/BLAS/array-operations.h                |  3 +--
 src/Benchmarks/BLAS/dense-mv.h                        |  2 +-
 src/Benchmarks/BLAS/spmv.h                            |  2 +-
 src/Benchmarks/BLAS/triad.h                           |  2 +-
 src/Benchmarks/BLAS/vector-operations.h               |  2 +-
 src/Benchmarks/CMakeLists.txt                         | 11 -----------
 .../DistSpMV/tnl-benchmark-distributed-spmv.h         |  2 +-
 src/Benchmarks/LinearSolvers/benchmarks.h             |  2 +-
 .../LinearSolvers/tnl-benchmark-linear-solvers.h      |  2 +-
 .../NDArray/tnl-benchmark-ndarray-boundary.h          |  2 +-
 src/Benchmarks/NDArray/tnl-benchmark-ndarray.h        |  2 +-
 src/Benchmarks/ODESolvers/benchmarks.h                |  2 +-
 src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h |  2 +-
 src/Benchmarks/SpMV/SpmvBenchmarkResult.h             |  2 +-
 src/Benchmarks/SpMV/spmv.h                            |  4 ++--
 src/Benchmarks/Traversers/tnl-benchmark-traversers.h  |  2 +-
 src/{ => TNL}/Benchmarks/Benchmarks.h                 |  0
 src/{ => TNL}/Benchmarks/Benchmarks.hpp               |  0
 src/{ => TNL}/Benchmarks/CustomLogging.h              |  0
 src/{ => TNL}/Benchmarks/JsonLogging.h                |  0
 src/{ => TNL}/Benchmarks/Logging.h                    |  0
 src/{ => TNL}/Benchmarks/Utils.h                      |  0
 22 files changed, 16 insertions(+), 28 deletions(-)
 rename src/{ => TNL}/Benchmarks/Benchmarks.h (100%)
 rename src/{ => TNL}/Benchmarks/Benchmarks.hpp (100%)
 rename src/{ => TNL}/Benchmarks/CustomLogging.h (100%)
 rename src/{ => TNL}/Benchmarks/JsonLogging.h (100%)
 rename src/{ => TNL}/Benchmarks/Logging.h (100%)
 rename src/{ => TNL}/Benchmarks/Utils.h (100%)

diff --git a/src/Benchmarks/BLAS/array-operations.h b/src/Benchmarks/BLAS/array-operations.h
index 1bffd770a..271819de7 100644
--- a/src/Benchmarks/BLAS/array-operations.h
+++ b/src/Benchmarks/BLAS/array-operations.h
@@ -14,8 +14,7 @@
 
 #include <cstring>
 
-#include "../Benchmarks.h"
-
+#include <TNL/Benchmarks/Benchmarks.h>
 #include <TNL/Containers/Array.h>
 
 namespace TNL {
diff --git a/src/Benchmarks/BLAS/dense-mv.h b/src/Benchmarks/BLAS/dense-mv.h
index b3e4834a5..7b1bb1c6d 100644
--- a/src/Benchmarks/BLAS/dense-mv.h
+++ b/src/Benchmarks/BLAS/dense-mv.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 #include "cublasWrappers.h"
 
 #include <TNL/Containers/Vector.h>
diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
index a6fcc30d7..6f70e34d9 100644
--- a/src/Benchmarks/BLAS/spmv.h
+++ b/src/Benchmarks/BLAS/spmv.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 #include <TNL/Pointers/DevicePointer.h>
 #include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h>
diff --git a/src/Benchmarks/BLAS/triad.h b/src/Benchmarks/BLAS/triad.h
index d2bdf12cf..b466e5652 100644
--- a/src/Benchmarks/BLAS/triad.h
+++ b/src/Benchmarks/BLAS/triad.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 #include <TNL/Containers/Array.h>
 #include <TNL/Allocators/CudaHost.h>
diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h
index c2a3ceab3..1d9937de8 100644
--- a/src/Benchmarks/BLAS/vector-operations.h
+++ b/src/Benchmarks/BLAS/vector-operations.h
@@ -15,7 +15,7 @@
 #include <stdlib.h> // srand48
 #include <numeric>  // std::partial_sum
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/scan.h>
diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index f379b690a..0fc8e0f02 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -7,14 +7,3 @@ add_subdirectory( LinearSolvers )
 add_subdirectory( ODESolvers )
 add_subdirectory( Sorting )
 add_subdirectory( Traversers )
-
-set( headers
-         Benchmarks.h
-         Benchmarks.hpp
-         Logging.h
-         CustomLogging.h
-         JsonLogging.h
-         Utils.h
-)
-
-install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index e43c509f5..260615b5b 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -27,7 +27,7 @@
 #include "Legacy/DistributedMatrix.h"
 #include <TNL/Matrices/SparseOperations.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 #include "ordering.h"
 
 #include <TNL/Matrices/SparseMatrix.h>
diff --git a/src/Benchmarks/LinearSolvers/benchmarks.h b/src/Benchmarks/LinearSolvers/benchmarks.h
index 2d2fe825a..33395b04d 100644
--- a/src/Benchmarks/LinearSolvers/benchmarks.h
+++ b/src/Benchmarks/LinearSolvers/benchmarks.h
@@ -5,7 +5,7 @@
 #include <TNL/Solvers/IterativeSolverMonitor.h>
 #include <TNL/Matrices/DistributedMatrix.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 #ifdef HAVE_ARMADILLO
 #include <armadillo>
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 53175efe8..b4e3c2f64 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -40,7 +40,7 @@
 #include <TNL/Solvers/Linear/BICGStabL.h>
 #include <TNL/Solvers/Linear/UmfpackWrapper.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 #include "../DistSpMV/ordering.h"
 #include "benchmarks.h"
 
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
index 9400a473f..70fe0420f 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
@@ -18,7 +18,7 @@
 
 #include <TNL/Containers/NDArray.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index 094fbd1af..de3ccdbb5 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -20,7 +20,7 @@
 #include <TNL/Containers/NDArray.h>
 #include <TNL/Containers/ndarray/Operations.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 using namespace TNL;
 using namespace TNL::Benchmarks;
diff --git a/src/Benchmarks/ODESolvers/benchmarks.h b/src/Benchmarks/ODESolvers/benchmarks.h
index f27d6962e..c901b6694 100644
--- a/src/Benchmarks/ODESolvers/benchmarks.h
+++ b/src/Benchmarks/ODESolvers/benchmarks.h
@@ -15,7 +15,7 @@
 #include <TNL/Pointers/SharedPointer.h>
 #include <TNL/Config/ParameterContainer.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 #include <stdexcept>  // std::runtime_error
 
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index 3f091438f..acfba1ef0 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -28,7 +28,7 @@
 #include <TNL/Solvers/ODE/Euler.h>
 #include <TNL/Solvers/ODE/Merson.h>
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 #include "benchmarks.h"
 #include "SimpleProblem.h"
 #include "Euler.h"
diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index f04d197ea..2231613d9 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -10,7 +10,7 @@
 
 #pragma once
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 
 namespace TNL {
 namespace Benchmarks {
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index d4abd419d..11743dadf 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -16,8 +16,8 @@
 
 #include <cstdint>
 
-#include "../Benchmarks.h"
-#include "../JsonLogging.h"
+#include <TNL/Benchmarks/Benchmarks.h>
+#include <TNL/Benchmarks/JsonLogging.h>
 #include "SpmvBenchmarkResult.h"
 
 #include <TNL/Pointers/DevicePointer.h>
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index fa058bbcd..6ea3ef517 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "../Benchmarks.h"
+#include <TNL/Benchmarks/Benchmarks.h>
 //#include "grid-traversing.h"
 #include "GridTraversersBenchmark.h"
 
diff --git a/src/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
similarity index 100%
rename from src/Benchmarks/Benchmarks.h
rename to src/TNL/Benchmarks/Benchmarks.h
diff --git a/src/Benchmarks/Benchmarks.hpp b/src/TNL/Benchmarks/Benchmarks.hpp
similarity index 100%
rename from src/Benchmarks/Benchmarks.hpp
rename to src/TNL/Benchmarks/Benchmarks.hpp
diff --git a/src/Benchmarks/CustomLogging.h b/src/TNL/Benchmarks/CustomLogging.h
similarity index 100%
rename from src/Benchmarks/CustomLogging.h
rename to src/TNL/Benchmarks/CustomLogging.h
diff --git a/src/Benchmarks/JsonLogging.h b/src/TNL/Benchmarks/JsonLogging.h
similarity index 100%
rename from src/Benchmarks/JsonLogging.h
rename to src/TNL/Benchmarks/JsonLogging.h
diff --git a/src/Benchmarks/Logging.h b/src/TNL/Benchmarks/Logging.h
similarity index 100%
rename from src/Benchmarks/Logging.h
rename to src/TNL/Benchmarks/Logging.h
diff --git a/src/Benchmarks/Utils.h b/src/TNL/Benchmarks/Utils.h
similarity index 100%
rename from src/Benchmarks/Utils.h
rename to src/TNL/Benchmarks/Utils.h
-- 
GitLab


From a0af8b32a08c5f45e4107354be6525aab73c2377 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 7 Nov 2021 21:44:49 +0100
Subject: [PATCH 26/40] SpMV benchmark: implemented logging of errors and
 refactored logging of metadata

Fixes #62
---
 src/Benchmarks/SpMV/SpmvBenchmarkResult.h | 24 +++---
 src/Benchmarks/SpMV/spmv.h                | 97 +++++++++++++----------
 2 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index 2231613d9..0ea788348 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -37,44 +37,38 @@ struct SpmvBenchmarkResult
    using BenchmarkResult::time;
 
 
-   SpmvBenchmarkResult( const String& format,
-                        const HostVector& csrResult,
-                        const BenchmarkVector& benchmarkResult,
-                        const IndexType nonzeros )
-   : format( format ), csrResult( csrResult ), benchmarkResult( benchmarkResult ), nonzeros( nonzeros ){};
+   SpmvBenchmarkResult( const HostVector& csrResult,
+                        const BenchmarkVector& benchmarkResult )
+   : csrResult( csrResult ), benchmarkResult( benchmarkResult )
+   {}
 
    virtual HeaderElements getTableHeader() const override
    {
-      return HeaderElements({ "format", "device", "non-zeros", "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2" });
+      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2" });
    }
 
    virtual std::vector< int > getColumnWidthHints() const override
    {
-      return std::vector< int >({ 35, 12, 12, 12, 12, 14, 12, 12, 14, 14 });
+      return std::vector< int >({ 12, 12, 14, 12, 12, 14, 14 });
    }
 
-   void setFormat( const String& format ) { this->format = format; };
-
    virtual RowElements getRowElements() const override
    {
       HostVector benchmarkResultCopy;
       benchmarkResultCopy = benchmarkResult;
       auto diff = csrResult - benchmarkResultCopy;
       RowElements elements;
-      elements << format
-               << ( std::is_same< Device, Devices::Host >::value ? "CPU" : "GPU" )
-               << nonzeros << time << stddev << stddev/time << bandwidth;
+      elements << time << stddev << stddev/time << bandwidth;
       if( speedup != 0.0 )
          elements << speedup;
-      else elements << "N/A";
+      else
+         elements << "N/A";
       elements << max( abs( diff ) ) << lpNorm( diff, 2.0 );
       return elements;
    }
 
-   String format;
    const HostVector& csrResult;
    const BenchmarkVector& benchmarkResult;
-   const IndexType nonzeros;
 };
 
 } //namespace Benchmarks
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 11743dadf..794f4d9c7 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -204,6 +204,8 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
    using HostVector = Containers::Vector< Real, Devices::Host, int >;
    using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
 
+   benchmark.setMetadataElement({ "format", MatrixInfo< HostMatrix >::getFormat() });
+
    HostMatrix hostMatrix;
    CudaMatrix cudaMatrix;
 
@@ -213,12 +215,12 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to read the matrix: " << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to read the matrix:" + String(e.what()) );
       return;
    }
 
-   const int elements = hostMatrix.getNonzeroElementsCount();
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   const int nonzeros = hostMatrix.getNonzeroElementsCount();
+   const double datasetSize = (double) nonzeros * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setDatasetSize( datasetSize );
 
    /////
@@ -237,7 +239,7 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
          hostMatrix.vectorProduct( hostInVector, hostOutVector );
 
       };
-      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
+      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
       benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
    }
 
@@ -251,7 +253,7 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to copy the matrix on GPU: " << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to copy the matrix on GPU: " + String(e.what()) );
       return;
    }
 
@@ -265,7 +267,7 @@ benchmarkSpMVLegacy( BenchmarkType& benchmark,
    auto spmvCuda = [&]() {
       cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
    };
-   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
    benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
  #endif
 }
@@ -286,6 +288,8 @@ benchmarkSpMV( BenchmarkType& benchmark,
    using HostVector = Containers::Vector< Real, Devices::Host, int >;
    using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
 
+   benchmark.setMetadataElement({ "format", MatrixInfo< HostMatrix >::getFormat() });
+
    HostMatrix hostMatrix;
    try
    {
@@ -293,12 +297,12 @@ benchmarkSpMV( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to convert the matrix to the target format:"  << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to convert the matrix to the target format:" + String(e.what()) );
       return;
    }
 
-   const int elements = hostMatrix.getNonzeroElementsCount();
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   const int nonzeros = hostMatrix.getNonzeroElementsCount();
+   const double datasetSize = (double) nonzeros * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setDatasetSize( datasetSize );
 
    /////
@@ -317,7 +321,7 @@ benchmarkSpMV( BenchmarkType& benchmark,
          hostMatrix.vectorProduct( hostInVector, hostOutVector );
 
       };
-      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
+      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
       benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
    }
 
@@ -332,7 +336,7 @@ benchmarkSpMV( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to copy the matrix on GPU:" << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to copy the matrix on GPU: " + String(e.what()) );
       return;
    }
 
@@ -346,7 +350,7 @@ benchmarkSpMV( BenchmarkType& benchmark,
    auto spmvCuda = [&]() {
       cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
    };
-   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
    benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
  #endif
 }
@@ -368,6 +372,8 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
    using HostVector = Containers::Vector< Real, Devices::Host, int >;
    using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
 
+   benchmark.setMetadataElement({ "format", MatrixInfo< HostMatrix >::getFormat() });
+
    HostMatrix hostMatrix;
    try
    {
@@ -375,12 +381,12 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to convert the matrix to the target format:"  << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to convert the matrix to the target format:" + String(e.what()) );
       return;
    }
 
-   const int elements = hostMatrix.getNonzeroElementsCount();
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   const int nonzeros = hostMatrix.getNonzeroElementsCount();
+   const double datasetSize = (double) nonzeros * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setDatasetSize( datasetSize );
 
    /////
@@ -399,7 +405,7 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
          hostMatrix.vectorProduct( hostInVector, hostOutVector );
 
       };
-      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
+      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
       benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
    }
 
@@ -414,7 +420,7 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to copy the matrix on GPU:" << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to copy the matrix on GPU: " + String(e.what()) );
       return;
    }
 
@@ -432,14 +438,18 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
    {
       cudaMatrix.getSegments().getKernel().setThreadsMapping( Algorithms::Segments::CSRLightAutomaticThreads );
       String format = MatrixInfo< HostMatrix >::getFormat() + " Automatic";
-      SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( format, csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+      benchmark.setMetadataElement({ "format", format });
+
+      SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
       benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
    };
 
    {
       cudaMatrix.getSegments().getKernel().setThreadsMapping( Algorithms::Segments::CSRLightAutomaticThreadsLightSpMV );
       String format = MatrixInfo< HostMatrix >::getFormat() + " Automatic Light";
-      SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( format, csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+      benchmark.setMetadataElement({ "format", format });
+
+      SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
       benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
    };
 
@@ -447,7 +457,9 @@ benchmarkSpMVCSRLight( BenchmarkType& benchmark,
    {
       cudaMatrix.getSegments().getKernel().setThreadsPerSegment( threadsPerRow );
       String format = MatrixInfo< HostMatrix >::getFormat() + " " + convertToString( threadsPerRow );
-      SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( format, csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+      benchmark.setMetadataElement({ "format", format });
+
+      SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
       benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
    }*/
  #endif
@@ -470,6 +482,8 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
    using HostVector = Containers::Vector< Real, Devices::Host, int >;
    using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
 
+   benchmark.setMetadataElement({ "format", MatrixInfo< HostMatrix >::getFormat() });
+
    HostMatrix hostMatrix;
    try
    {
@@ -477,12 +491,12 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to convert the matrix to the target format:" << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to convert the matrix to the target format:" + String(e.what()) );
       return;
    }
 
-   const int elements = hostMatrix.getNonzeroElementsCount();
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   const int nonzeros = hostMatrix.getNonzeroElementsCount();
+   const double datasetSize = (double) nonzeros * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setDatasetSize( datasetSize );
 
    /////
@@ -501,7 +515,7 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
          hostMatrix.vectorProduct( hostInVector, hostOutVector );
 
       };
-      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, hostOutVector, hostMatrix.getNonzeroElementsCount() );
+      SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
       benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
    }
 
@@ -516,7 +530,7 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << "Unable to copy the matrix on GPU:" << e.what() << std::endl;
+      benchmark.addErrorMessage( "Unable to copy the matrix on GPU: " + String(e.what()) );
       return;
    }
 
@@ -530,7 +544,7 @@ benchmarkBinarySpMV( BenchmarkType& benchmark,
    auto spmvCuda = [&]() {
       cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
    };
-   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( MatrixInfo< HostMatrix >::getFormat(), csrResultVector, cudaOutVector, cudaMatrix.getNonzeroElementsCount() );
+   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
    benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
  #endif
 }
@@ -645,7 +659,7 @@ dispatchSymmetric( BenchmarkType& benchmark,
    }
    catch(const std::exception& e)
    {
-      std::cerr << e.what() << " ... SKIPPING " << std::endl;
+      benchmark.addErrorMessage( "Unable to read the symmetric matrix: " + String(e.what()) );
       return;
    }
    InputMatrix hostMatrix;
@@ -706,20 +720,20 @@ benchmarkSpmv( BenchmarkType& benchmark,
    // Set-up benchmark datasize
    //
    MatrixReader< CSRHostMatrix >::readMtx( inputFileName, csrHostMatrix, verboseMR );
-   const int elements = csrHostMatrix.getNonzeroElementsCount();
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   const int nonzeros = csrHostMatrix.getNonzeroElementsCount();
+   const double datasetSize = (double) nonzeros * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    benchmark.setDatasetSize( datasetSize );
 
    ////
    // Perform benchmark on host with CSR as a reference CPU format
    //
-   auto nonzeros = csrHostMatrix.getNonzeroElementsCount();
    benchmark.setMetadataColumns({
       { "matrix name", convertToString( inputFileName ) },
       { "rows", convertToString( csrHostMatrix.getRows() ) },
       { "columns", convertToString( csrHostMatrix.getColumns() ) },
       { "nonzeros", convertToString( nonzeros ) },
-      { "nonzeros per row", convertToString( ( double ) nonzeros / ( double ) csrHostMatrix.getRows() ) },
+      // NOTE: this can be easily calculated with Pandas based on the other metadata
+      //{ "nonzeros per row", convertToString( ( double ) nonzeros / ( double ) csrHostMatrix.getRows() ) },
    });
 
    HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
@@ -733,8 +747,9 @@ benchmarkSpmv( BenchmarkType& benchmark,
        csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
    };
 
-   SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( String( "CSR" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   benchmark.time< Devices::Host >( resetHostVectors, "", spmvCSRHost, csrBenchmarkResults );
+   SpmvBenchmarkResult< Real, Devices::Host, int > csrBenchmarkResults( hostOutVector, hostOutVector );
+   benchmark.setMetadataElement({ "format", "CSR" });
+   benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvCSRHost, csrBenchmarkResults );
 
 #ifdef HAVE_PETSC
    Mat petscMatrix;
@@ -761,8 +776,9 @@ benchmarkSpmv( BenchmarkType& benchmark,
       MatMult( petscMatrix, inVector, outVector );
    };
 
-   SpmvBenchmarkResult< Real, Devices::Host, int > petscBenchmarkResults( String( "Petsc" ), hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
-   benchmark.time< Devices::Host >( resetPetscVectors, "", petscSpmvCSRHost, petscBenchmarkResults );
+   SpmvBenchmarkResult< Real, Devices::Host, int > petscBenchmarkResults( hostOutVector, hostOutVector );
+   benchmark.setMetadataElement({ "format", "Petsc" });
+   benchmark.time< Devices::Host >( resetPetscVectors, "CPU", petscSpmvCSRHost, petscBenchmarkResults );
 #endif
 
 
@@ -790,15 +806,14 @@ benchmarkSpmv( BenchmarkType& benchmark,
        cusparseMatrix.vectorProduct( cudaInVector, cudaOutVector );
    };
 
-   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( String( "cusparse" ), hostOutVector, cudaOutVector, csrHostMatrix.getNonzeroElementsCount() );
+   SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( hostOutVector, cudaOutVector );
+   benchmark.setMetadataElement({ "format", "cusparse" });
    benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cudaBenchmarkResults );
 
 #ifdef HAVE_CSR5
    ////
    // Perform benchmark on CUDA device with CSR5 as a reference GPU format
    //
-   cudaBenchmarkResults.setFormat( String( "CSR5" ) );
-
    CudaVector cudaOutVector2( cudaOutVector );
    CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector );
 
@@ -806,6 +821,7 @@ benchmarkSpmv( BenchmarkType& benchmark,
        csr5Benchmark.vectorProduct();
    };
 
+   benchmark.setMetadataElement({ "format", "CSR5" });
    benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cudaBenchmarkResults );
    std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
    csrCudaMatrix.reset();
@@ -814,8 +830,6 @@ benchmarkSpmv( BenchmarkType& benchmark,
    ////
    // Perform benchmark on CUDA device with LightSpMV as a reference GPU format
    //
-   cudaBenchmarkResults.setFormat( String( "LightSpMV Vector" ) );
-
    LightSpMVCSRHostMatrix lightSpMVCSRHostMatrix;
    lightSpMVCSRHostMatrix = csrHostMatrix;
    LightSpMVBenchmark< Real > lightSpMVBenchmark( lightSpMVCSRHostMatrix, LightSpMVBenchmarkKernelVector );
@@ -826,10 +840,11 @@ benchmarkSpmv( BenchmarkType& benchmark,
    auto spmvLightSpMV = [&]() {
        lightSpMVBenchmark.vectorProduct();
    };
+   benchmark.setMetadataElement({ "format", "LightSpMV Vector" });
    benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
 
-   cudaBenchmarkResults.setFormat( String( "LightSpMV Warp" ) );
    lightSpMVBenchmark.setKernelType( LightSpMVBenchmarkKernelWarp );
+   benchmark.setMetadataElement({ "format", "LightSpMV Warp" });
    benchmark.time< Devices::Cuda >( resetLightSpMVVectors, "GPU", spmvLightSpMV, cudaBenchmarkResults );
 #endif
    csrHostMatrix.reset();
-- 
GitLab


From 8f5803f1e731c31dea3ea19f3ba64f9674de0aa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 9 Nov 2021 14:06:28 +0100
Subject: [PATCH 27/40] Benchmarks: all results are written in scientific
 format to avoid precision loss

---
 src/Benchmarks/SpMV/SpmvBenchmarkResult.h | 5 +++--
 src/TNL/Benchmarks/Benchmarks.h           | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index 0ea788348..755489e07 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -49,7 +49,7 @@ struct SpmvBenchmarkResult
 
    virtual std::vector< int > getColumnWidthHints() const override
    {
-      return std::vector< int >({ 12, 12, 14, 12, 12, 14, 14 });
+      return std::vector< int >({ 14, 14, 14, 14, 14, 14, 14 });
    }
 
    virtual RowElements getRowElements() const override
@@ -58,7 +58,8 @@ struct SpmvBenchmarkResult
       benchmarkResultCopy = benchmarkResult;
       auto diff = csrResult - benchmarkResultCopy;
       RowElements elements;
-      elements << time << stddev << stddev/time << bandwidth;
+      // write in scientific format to avoid precision loss
+      elements << std::scientific << time << stddev << stddev/time << bandwidth;
       if( speedup != 0.0 )
          elements << speedup;
       else
diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index c258143c2..27dc077d1 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -43,13 +43,14 @@ struct BenchmarkResult
 
    virtual std::vector< int > getColumnWidthHints() const
    {
-      return std::vector< int >({ 8, 8, 8, 8, 8 });
+      return std::vector< int >({ 14, 14, 14, 14, 14 });
    }
 
    virtual RowElements getRowElements() const
    {
       RowElements elements;
-      elements << time << stddev << stddev / time << bandwidth;
+      // write in scientific format to avoid precision loss
+      elements << std::scientific << time << stddev << stddev / time << bandwidth;
       if( speedup != 0 )
          elements << speedup;
       else
-- 
GitLab


From 6b92af5ec22552cfdc4fb7210c757e6a4383bf19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Tue, 9 Nov 2021 14:45:56 +0100
Subject: [PATCH 28/40] Benchmarks: added configuration of widths for metadata
 columns

---
 src/Benchmarks/SpMV/spmv.h         |  6 +++++-
 src/TNL/Benchmarks/Benchmarks.h    |  3 +++
 src/TNL/Benchmarks/Benchmarks.hpp  |  8 ++++++++
 src/TNL/Benchmarks/CustomLogging.h | 17 +++++++++++++++--
 src/TNL/Benchmarks/JsonLogging.h   | 23 +++++++++++++++++++----
 src/TNL/Benchmarks/Logging.h       |  2 ++
 6 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 794f4d9c7..0e57e68c1 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -728,13 +728,17 @@ benchmarkSpmv( BenchmarkType& benchmark,
    // Perform benchmark on host with CSR as a reference CPU format
    //
    benchmark.setMetadataColumns({
-      { "matrix name", convertToString( inputFileName ) },
+      { "matrix name", inputFileName },
       { "rows", convertToString( csrHostMatrix.getRows() ) },
       { "columns", convertToString( csrHostMatrix.getColumns() ) },
       { "nonzeros", convertToString( nonzeros ) },
       // NOTE: this can be easily calculated with Pandas based on the other metadata
       //{ "nonzeros per row", convertToString( ( double ) nonzeros / ( double ) csrHostMatrix.getRows() ) },
    });
+   benchmark.setMetadataWidths({
+      { "matrix name", 32 },
+      { "format", 35 },
+   });
 
    HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
 
diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index 27dc077d1..10f036f37 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -95,6 +95,9 @@ class Benchmark
       // changing MetadataColumns that were set using the previous method.
       void setMetadataElement( const typename MetadataColumns::value_type & element );
 
+      // Sets the width of metadata columns when printed to the terminal.
+      void setMetadataWidths( const std::map< std::string, int > & widths );
+
       // Sets the dataset size and base time for the calculations of bandwidth
       // and speedup in the benchmarks result.
       void setDatasetSize( const double datasetSize = 0.0, // in GB
diff --git a/src/TNL/Benchmarks/Benchmarks.hpp b/src/TNL/Benchmarks/Benchmarks.hpp
index 3205ebb5d..a2519c1b5 100644
--- a/src/TNL/Benchmarks/Benchmarks.hpp
+++ b/src/TNL/Benchmarks/Benchmarks.hpp
@@ -108,6 +108,14 @@ setMetadataElement( const typename MetadataColumns::value_type & element )
    logger.setMetadataElement( element );
 }
 
+template< typename Logger >
+void
+Benchmark< Logger >::
+setMetadataWidths( const std::map< std::string, int > & widths )
+{
+   logger.setMetadataWidths( widths );
+}
+
 template< typename Logger >
 void
 Benchmark< Logger >::
diff --git a/src/TNL/Benchmarks/CustomLogging.h b/src/TNL/Benchmarks/CustomLogging.h
index 7f5dafeb9..c09a53268 100644
--- a/src/TNL/Benchmarks/CustomLogging.h
+++ b/src/TNL/Benchmarks/CustomLogging.h
@@ -84,13 +84,24 @@ public:
       }
    }
 
+   virtual void
+   setMetadataWidths( const std::map< std::string, int > & widths ) override
+   {
+      for( auto & it : widths )
+         if( metadataWidths.count( it.first ) )
+            metadataWidths[ it.first ] = it.second;
+         else
+            metadataWidths.insert( it );
+   }
+
    void
    writeTableHeader( const std::string & spanningElement,
                      const HeaderElements & subElements )
    {
       if( verbose && header_changed ) {
          for( auto & it : metadataColumns ) {
-            std::cout << std::setw( 20 ) << it.first;
+            const int width = (metadataWidths.count( it.first )) ? metadataWidths[ it.first ] : 15;
+            std::cout << std::setw( width ) << it.first;
          }
 
          // spanning element is printed as usual column to stdout,
@@ -124,7 +135,8 @@ public:
    {
       if( verbose ) {
          for( auto & it : metadataColumns ) {
-            std::cout << std::setw( 20 ) << it.second;
+            const int width = (metadataWidths.count( it.first )) ? metadataWidths[ it.first ] : 15;
+            std::cout << std::setw( width ) << it.second;
          }
          // spanning element is printed as usual column to stdout
          std::cout << std::setw( 15 ) << spanningElement;
@@ -220,6 +232,7 @@ protected:
    std::stringstream log;
 
    MetadataColumns metadataColumns;
+   std::map< std::string, int > metadataWidths;
    bool header_changed = true;
 };
 
diff --git a/src/TNL/Benchmarks/JsonLogging.h b/src/TNL/Benchmarks/JsonLogging.h
index 5ed52a13e..6ff7236a5 100644
--- a/src/TNL/Benchmarks/JsonLogging.h
+++ b/src/TNL/Benchmarks/JsonLogging.h
@@ -79,13 +79,25 @@ public:
       }
    }
 
+   virtual void
+   setMetadataWidths( const std::map< std::string, int > & widths ) override
+   {
+      for( auto & it : widths )
+         if( metadataWidths.count( it.first ) )
+            metadataWidths[ it.first ] = it.second;
+         else
+            metadataWidths.insert( it );
+   }
+
    void writeHeader( const HeaderElements& headerElements, const WidthHints& widths )
    {
       TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
       if( verbose && header_changed )
       {
-         for( auto & lg : metadataColumns )
-            std::cout << std::setw( 20 ) << lg.first;
+         for( auto & lg : metadataColumns ) {
+            const int width = (metadataWidths.count( lg.first )) ? metadataWidths[ lg.first ] : 14;
+            std::cout << std::setw( width ) << lg.first;
+         }
          for( std::size_t i = 0; i < headerElements.size(); i++ )
             std::cout << std::setw( widths[ i ] ) << headerElements[ i ];
          std::cout << std::endl;
@@ -107,8 +119,10 @@ public:
       int idx( 0 );
       for( auto lg : this->metadataColumns )
       {
-         if( verbose )
-            std::cout << std::setw( 20 ) << lg.second;
+         if( verbose ) {
+            const int width = (metadataWidths.count( lg.first )) ? metadataWidths[ lg.first ] : 14;
+            std::cout << std::setw( width ) << lg.second;
+         }
          if( idx++ > 0 )
             log << ", ";
          log << "\"" << lg.first << "\": \"" << lg.second << "\"";
@@ -200,6 +214,7 @@ protected:
    std::stringstream log;
 
    MetadataColumns metadataColumns;
+   std::map< std::string, int > metadataWidths;
    bool header_changed = true;
 };
 
diff --git a/src/TNL/Benchmarks/Logging.h b/src/TNL/Benchmarks/Logging.h
index 2e27195e7..7c851ad05 100644
--- a/src/TNL/Benchmarks/Logging.h
+++ b/src/TNL/Benchmarks/Logging.h
@@ -110,6 +110,8 @@ public:
    virtual void setMetadataElement( const typename MetadataColumns::value_type & element,
                                     int insertPosition = -1 /* negative values insert from the end */ ) = 0;
 
+   virtual void setMetadataWidths( const std::map< std::string, int > & widths ) = 0;
+
    virtual void
    logResult( const std::string& performer,
               const HeaderElements& headerElements,
-- 
GitLab


From 4f560a30ffa0c28dd168c55d98922c598fe07d4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 14 Nov 2021 21:51:26 +0100
Subject: [PATCH 29/40] BLAS benchmark: renamed dense-mv.h to gemv.h and added
 benchmarks of rectangular matrices

---
 src/Benchmarks/BLAS/{dense-mv.h => gemv.h} | 57 +++++++---------------
 src/Benchmarks/BLAS/tnl-benchmark-blas.h   | 19 +++++---
 2 files changed, 28 insertions(+), 48 deletions(-)
 rename src/Benchmarks/BLAS/{dense-mv.h => gemv.h} (60%)

diff --git a/src/Benchmarks/BLAS/dense-mv.h b/src/Benchmarks/BLAS/gemv.h
similarity index 60%
rename from src/Benchmarks/BLAS/dense-mv.h
rename to src/Benchmarks/BLAS/gemv.h
index 7b1bb1c6d..93147d6ba 100644
--- a/src/Benchmarks/BLAS/dense-mv.h
+++ b/src/Benchmarks/BLAS/gemv.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          dense-mv.h  -  description
+                          gemv.h  -  description
                              -------------------
     begin                : Jul 8, 2021
     copyright            : (C) 2021 by Tomas Oberhuber et al.
@@ -8,7 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-// Implemented by: Jakub Klinkovsky
+// Implemented by: Jakub Klinkovsky, Tomas Oberhuber
 
 #pragma once
 
@@ -16,7 +16,6 @@
 #include "cublasWrappers.h"
 
 #include <TNL/Containers/Vector.h>
-#include <TNL/Pointers/DevicePointer.h>
 #include <TNL/Matrices/DenseMatrix.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Devices/Host.h>
@@ -27,16 +26,12 @@ namespace Benchmarks {
 template< typename Matrix >
 void setMatrix( Matrix& matrix )
 {
-   using RealType = typename Matrix::RealType;
-   using IndexType = typename Matrix::IndexType;
-   matrix.forAllElements( [] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, RealType& value ) {
-       value = 1.0; } );
+   matrix.setValue( 1.0 );
 }
 
 template< typename Real >
 void
-benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
-                           const int & size )
+benchmarkGemv( Benchmark<> & benchmark, int rows, int columns )
 {
    using HostMatrix = TNL::Matrices::DenseMatrix< Real, TNL::Devices::Host >;
    using RowMajorCudaMatrix = TNL::Matrices::DenseMatrix< Real, TNL::Devices::Cuda, int, TNL::Algorithms::Segments::RowMajorOrder >;
@@ -50,16 +45,13 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
    HostVector inHostVector, outHostVector;
    CudaVector inCudaVector, outCudaVector1, outCudaVector2;
 
-   // set metadata
-   const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() );
-   benchmark.setMetadataElement({ "format", parsedType[ 0 ] });
-
-   hostMatrix.setDimensions( size, size );
-   inHostVector.setSize( size );
-   outHostVector.setSize( size );
+   hostMatrix.setDimensions( rows, columns );
+   inHostVector.setSize( columns );
+   outHostVector.setSize( rows );
 
    setMatrix< HostMatrix >( hostMatrix );
-   const double datasetSize = (double) ( size * size ) * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+   const double datasetSize = (double) ( rows * columns + rows + columns ) * sizeof(Real) / oneGB;
+   benchmark.setOperation( "gemv", datasetSize );
 
    // reset function
    auto reset = [&]() {
@@ -76,14 +68,13 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
    auto spmvHost = [&]() {
       hostMatrix.vectorProduct( inHostVector, outHostVector );
    };
-   benchmark.setDatasetSize( datasetSize );
    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
 
 #ifdef HAVE_CUDA
-   columnMajorCudaMatrix.setDimensions( size, size );
-   inCudaVector.setSize( size );
-   outCudaVector1.setSize( size );
-   outCudaVector2.setSize( size );
+   columnMajorCudaMatrix.setDimensions( rows, columns );
+   inCudaVector.setSize( columns );
+   outCudaVector1.setSize( rows );
+   outCudaVector2.setSize( rows );
    setMatrix< ColumnMajorCudaMatrix >( columnMajorCudaMatrix );
 
    auto columnMajorMvCuda = [&]() {
@@ -93,7 +84,7 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
 
    columnMajorCudaMatrix.reset();
 
-   rowMajorCudaMatrix.setDimensions( size, size );
+   rowMajorCudaMatrix.setDimensions( rows, columns );
    setMatrix< RowMajorCudaMatrix >( rowMajorCudaMatrix );
 
    auto rowMajorMvCuda = [&]() {
@@ -105,7 +96,7 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
    //std::cerr << outCudaVector1 << std::endl << outCudaVector2 << std::endl;
 
    rowMajorCudaMatrix.reset();
-   columnMajorCudaMatrix.setDimensions( size, size );
+   columnMajorCudaMatrix.setDimensions( rows, columns );
    setMatrix< ColumnMajorCudaMatrix >( columnMajorCudaMatrix );
 
    cublasHandle_t cublasHandle;
@@ -113,8 +104,8 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
    auto mvCublas = [&] () {
       Real alpha = 1.0;
       Real beta = 0.0;
-      cublasGemv( cublasHandle, CUBLAS_OP_N, size, size, &alpha,
-                  columnMajorCudaMatrix.getValues().getData(), size,
+      cublasGemv( cublasHandle, CUBLAS_OP_N, rows, columns, &alpha,
+                  columnMajorCudaMatrix.getValues().getData(), rows,
                   inCudaVector.getData(), 1, &beta,
                   outCudaVector1.getData(), 1 );
    };
@@ -124,19 +115,5 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
 #endif
 }
 
-/*template< typename Real = double,
-          typename Index = int >
-void
-benchmarkDenseSynthetic( Benchmark<> & benchmark,
-                         const int & size )
-{
-   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
-   // NOTE: CSR is disabled because it is very slow on GPU
-   //benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, size, elementsPerRow );
-   benchmarkSpMV< Real, Benchmarks::SpMV::ReferenceFormats::Legacy::Ellpack >( benchmark, size, elementsPerRow );
-   benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
-   benchmarkSpMV< Real, Benchmarks::SpMV::ReferenceFormats::Legacy::ChunkedEllpack >( benchmark, size, elementsPerRow );
-}*/
-
 } // namespace Benchmarks
 } // namespace TNL
diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index bf2708112..606ecae7d 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -22,7 +22,7 @@
 #include "vector-operations.h"
 #include "triad.h"
 #include "spmv.h"
-#include "dense-mv.h"
+#include "gemv.h"
 
 
 using namespace TNL;
@@ -106,14 +106,17 @@ runBlasBenchmarks( Benchmark<> & benchmark,
    // Dense matrix-vector multiplication
    benchmark.newBenchmark( String("Dense matrix-vector multiplication (") + precision + ")",
                            metadata );
-   for( std::size_t size = 10; size <= 20000; size *= 2 ) {
-      benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
-         { "rows", convertToString( size ) },
-         { "columns", convertToString( size ) }
-      } ));
-      benchmarkDenseMVSynthetic< Real >( benchmark, size );
+   for( std::size_t rows = 10; rows <= 20000 * 20000; rows *= 2 ) {
+      for( std::size_t columns = 10; columns <= 20000 * 20000; columns *= 2 ) {
+         if( rows * columns > 20000 * 20000 )
+            break;
+         benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+            { "rows", convertToString( rows ) },
+            { "columns", convertToString( columns ) }
+         } ));
+         benchmarkGemv< Real >( benchmark, rows, columns );
+      }
    }
-
 }
 
 void
-- 
GitLab


From 8d7f333064263f92ee5aeb2b4ed91e29c5b8de0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 14 Nov 2021 21:54:24 +0100
Subject: [PATCH 30/40] BLAS benchmark: removed SpMV benchmark, it is now a
 separate benchmark binary

---
 src/Benchmarks/BLAS/spmv.h               | 185 -----------------------
 src/Benchmarks/BLAS/tnl-benchmark-blas.h |  22 +--
 2 files changed, 3 insertions(+), 204 deletions(-)
 delete mode 100644 src/Benchmarks/BLAS/spmv.h

diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h
deleted file mode 100644
index 6f70e34d9..000000000
--- a/src/Benchmarks/BLAS/spmv.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/***************************************************************************
-                          spmv.h  -  description
-                             -------------------
-    begin                : Dec 30, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber et al.
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-// Implemented by: Jakub Klinkovsky
-
-#pragma once
-
-#include <TNL/Benchmarks/Benchmarks.h>
-
-#include <TNL/Pointers/DevicePointer.h>
-#include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h>
-#include <Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack.h>
-#include <Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack.h>
-#include <Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack.h>
-
-namespace TNL {
-namespace Benchmarks {
-
-// silly alias to match the number of template parameters with other formats
-template< typename Real, typename Device, typename Index >
-using SlicedEllpack = SpMV::ReferenceFormats::Legacy::SlicedEllpack< Real, Device, Index >;
-
-// Legacy formats
-template< typename Real, typename Device, typename Index >
-using SparseMatrixLegacy_CSR_Scalar = SpMV::ReferenceFormats::Legacy::CSR< Real, Device, Index, SpMV::ReferenceFormats::Legacy::CSRScalar >;
-
-
-template< typename Matrix >
-int setHostTestMatrix( Matrix& matrix,
-                       const int elementsPerRow )
-{
-   const int size = matrix.getRows();
-   int elements( 0 );
-   for( int row = 0; row < size; row++ ) {
-      int col = row - elementsPerRow / 2;
-      for( int element = 0; element < elementsPerRow; element++ ) {
-         if( col + element >= 0 &&
-            col + element < size )
-         {
-            matrix.setElement( row, col + element, element + 1 );
-            elements++;
-         }
-      }
-   }
-   return elements;
-}
-
-#ifdef HAVE_CUDA
-template< typename Matrix >
-__global__ void setCudaTestMatrixKernel( Matrix* matrix,
-                                         const int elementsPerRow,
-                                         const int gridIdx )
-{
-   const int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx >= matrix->getRows() )
-      return;
-   int col = rowIdx - elementsPerRow / 2;
-   for( int element = 0; element < elementsPerRow; element++ ) {
-      if( col + element >= 0 &&
-         col + element < matrix->getColumns() )
-         matrix->setElementFast( rowIdx, col + element, element + 1 );
-   }
-}
-#endif
-
-template< typename Matrix >
-void setCudaTestMatrix( Matrix& matrix,
-                        const int elementsPerRow )
-{
-#ifdef HAVE_CUDA
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::RealType RealType;
-   Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
-   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
-   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
-      if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-      setCudaTestMatrixKernel< Matrix >
-         <<< cudaGridSize, cudaBlockSize >>>
-         ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
-        TNL_CHECK_CUDA_DEVICE;
-   }
-#endif
-}
-
-
-// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
-template< typename Real,
-          template< typename, typename, typename > class Matrix >
-void
-benchmarkSpMV( Benchmark<> & benchmark,
-               const int & size,
-               const int elementsPerRow = 5 )
-{
-   typedef Matrix< Real, Devices::Host, int > HostMatrix;
-   typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
-   typedef Containers::Vector< Real, Devices::Host, int > HostVector;
-   typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
-
-   HostMatrix hostMatrix;
-   DeviceMatrix deviceMatrix;
-   Containers::Vector< int, Devices::Host, int > hostRowLengths;
-   Containers::Vector< int, Devices::Cuda, int > deviceRowLengths;
-   HostVector hostVector, hostVector2;
-   CudaVector deviceVector, deviceVector2;
-
-   // set metadata
-   const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() );
-   benchmark.setMetadataElement({ "format", parsedType[ 0 ] });
-
-   hostRowLengths.setSize( size );
-   hostMatrix.setDimensions( size, size );
-   hostVector.setSize( size );
-   hostVector2.setSize( size );
-#ifdef HAVE_CUDA
-   deviceRowLengths.setSize( size );
-   deviceMatrix.setDimensions( size, size );
-   deviceVector.setSize( size );
-   deviceVector2.setSize( size );
-#endif
-
-   hostRowLengths.setValue( elementsPerRow );
-#ifdef HAVE_CUDA
-   deviceRowLengths.setValue( elementsPerRow );
-#endif
-
-   hostMatrix.setCompressedRowLengths( hostRowLengths );
-#ifdef HAVE_CUDA
-   deviceMatrix.setCompressedRowLengths( deviceRowLengths );
-#endif
-
-   const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow );
-   setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow );
-   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-
-   // reset function
-   auto reset = [&]() {
-      hostVector.setValue( 1.0 );
-      hostVector2.setValue( 0.0 );
-#ifdef HAVE_CUDA
-      deviceVector.setValue( 1.0 );
-      deviceVector2.setValue( 0.0 );
-#endif
-   };
-
-   // compute functions
-   auto spmvHost = [&]() {
-      hostMatrix.vectorProduct( hostVector, hostVector2 );
-   };
-   benchmark.setDatasetSize( datasetSize );
-   benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
-#ifdef HAVE_CUDA
-   auto spmvCuda = [&]() {
-      deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
-   };
-   benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
-#endif
-}
-
-template< typename Real = double,
-          typename Index = int >
-void
-benchmarkSpmvSynthetic( Benchmark<> & benchmark,
-                        const int & size,
-                        const int & elementsPerRow )
-{
-   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
-   // NOTE: CSR is disabled because it is very slow on GPU
-   //benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, size, elementsPerRow );
-   benchmarkSpMV< Real, Benchmarks::SpMV::ReferenceFormats::Legacy::Ellpack >( benchmark, size, elementsPerRow );
-   benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
-   benchmarkSpMV< Real, Benchmarks::SpMV::ReferenceFormats::Legacy::ChunkedEllpack >( benchmark, size, elementsPerRow );
-}
-
-} // namespace Benchmarks
-} // namespace TNL
diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index 606ecae7d..f56c262c0 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -21,7 +21,6 @@
 #include "array-operations.h"
 #include "vector-operations.h"
 #include "triad.h"
-#include "spmv.h"
 #include "gemv.h"
 
 
@@ -35,8 +34,7 @@ runBlasBenchmarks( Benchmark<> & benchmark,
                    Benchmark<>::MetadataMap metadata,
                    const std::size_t & minSize,
                    const std::size_t & maxSize,
-                   const double & sizeStepFactor,
-                   const int & elementsPerRow )
+                   const double & sizeStepFactor )
 {
    const String precision = getType< Real >();
    metadata["precision"] = precision;
@@ -91,18 +89,6 @@ runBlasBenchmarks( Benchmark<> & benchmark,
    }
 #endif
 
-   // Sparse matrix-vector multiplication
-   benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
-                           metadata );
-   for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
-      benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
-         { "rows", convertToString( size ) },
-         { "columns", convertToString( size ) },
-         { "elements per row", convertToString( elementsPerRow ) },
-      } ));
-      benchmarkSpmvSynthetic< Real >( benchmark, size, elementsPerRow );
-   }
-
    // Dense matrix-vector multiplication
    benchmark.newBenchmark( String("Dense matrix-vector multiplication (") + precision + ")",
                            metadata );
@@ -135,7 +121,6 @@ setupConfig( Config::ConfigDescription & config )
    config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 10000000 );
    config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
    config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 );
-   config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 );
    config.addEntry< int >( "verbose", "Verbose mode.", 1 );
 
    config.addDelimiter( "Device settings:" );
@@ -170,7 +155,6 @@ main( int argc, char* argv[] )
    const std::size_t maxSize = parameters.getParameter< int >( "max-size" );
    const int sizeStepFactor = parameters.getParameter< int >( "size-step-factor" );
    const int loops = parameters.getParameter< int >( "loops" );
-   const int elementsPerRow = parameters.getParameter< int >( "elements-per-row" );
    const int verbose = parameters.getParameter< int >( "verbose" );
 
    if( sizeStepFactor <= 1 ) {
@@ -191,9 +175,9 @@ main( int argc, char* argv[] )
    Logging::MetadataMap metadata = getHardwareMetadata();
 
    if( precision == "all" || precision == "float" )
-      runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow );
+      runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor );
    if( precision == "all" || precision == "double" )
-      runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow );
+      runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor );
 
    if( ! benchmark.save( logFile ) ) {
       std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
-- 
GitLab


From 2b376ef441e47f3a92bb55cf89e59d37bed4d50e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 14 Nov 2021 22:43:16 +0100
Subject: [PATCH 31/40] Benchmarks: refactored logging to write directly into a
 file instead of an intermediate stringstream

---
 src/Benchmarks/BLAS/tnl-benchmark-blas.h      |  9 ++-----
 .../DistSpMV/tnl-benchmark-distributed-spmv.h | 14 +++--------
 .../tnl-benchmark-linear-solvers.h            | 14 +++--------
 .../NDArray/tnl-benchmark-ndarray-boundary.h  |  9 ++-----
 .../NDArray/tnl-benchmark-ndarray.h           |  9 ++-----
 .../ODESolvers/tnl-benchmark-ode-solvers.h    | 14 +++--------
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h      |  9 ++-----
 .../Traversers/tnl-benchmark-traversers.h     | 21 ++++++----------
 src/TNL/Benchmarks/Benchmarks.h               |  4 +--
 src/TNL/Benchmarks/Benchmarks.hpp             | 12 ++-------
 src/TNL/Benchmarks/CustomLogging.h            | 13 ----------
 src/TNL/Benchmarks/JsonLogging.h              | 12 ---------
 src/TNL/Benchmarks/Logging.h                  | 25 ++++++++++++++-----
 13 files changed, 46 insertions(+), 119 deletions(-)

diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index f56c262c0..169645fbe 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -166,10 +166,10 @@ main( int argc, char* argv[] )
    auto mode = std::ios::out;
    if( outputMode == "append" )
        mode |= std::ios::app;
-   std::ofstream logFile( logFileName.getString(), mode );
+   std::ofstream logFile( logFileName, mode );
 
    // init benchmark and common metadata
-   Benchmark<> benchmark( loops, verbose );
+   Benchmark<> benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
@@ -179,10 +179,5 @@ main( int argc, char* argv[] )
    if( precision == "all" || precision == "double" )
       runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor );
 
-   if( ! benchmark.save( logFile ) ) {
-      std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
-      return EXIT_FAILURE;
-   }
-
    return EXIT_SUCCESS;
 }
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index 260615b5b..404214409 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -331,10 +331,10 @@ main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile;
    if( rank == 0 )
-      logFile.open( logFileName.getString(), mode );
+      logFile.open( logFileName, mode );
 
    // init benchmark and common metadata
-   Benchmark<> benchmark( loops, verbose );
+   Benchmark<> benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
@@ -349,13 +349,5 @@ main( int argc, char* argv[] )
                                                    TNL::Matrices::GeneralMatrix,
                                                    SegmentsType
                                                  >;
-   const bool status = SpmvBenchmark< MatrixType >::run( benchmark, metadata, parameters );
-
-   if( rank == 0 )
-      if( ! benchmark.save( logFile ) ) {
-         std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-         return EXIT_FAILURE;
-      }
-
-   return ! status;
+   return ! SpmvBenchmark< MatrixType >::run( benchmark, metadata, parameters );
 }
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index b4e3c2f64..ad2fb38f3 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -611,10 +611,10 @@ main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile;
    if( rank == 0 )
-      logFile.open( logFileName.getString(), mode );
+      logFile.open( logFileName, mode );
 
    // init benchmark and common metadata
-   Benchmark<> benchmark( loops, verbose );
+   Benchmark<> benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
@@ -629,13 +629,5 @@ main( int argc, char* argv[] )
                                                    TNL::Matrices::GeneralMatrix,
                                                    SegmentsType
                                                  >;
-   const bool status = LinearSolversBenchmark< MatrixType >::run( benchmark, metadata, parameters );
-
-   if( rank == 0 )
-      if( ! benchmark.save( logFile ) ) {
-         std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-         return EXIT_FAILURE;
-      }
-
-   return ! status;
+   return ! LinearSolversBenchmark< MatrixType >::run( benchmark, metadata, parameters );
 }
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
index 70fe0420f..00aff7527 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
@@ -440,10 +440,10 @@ int main( int argc, char* argv[] )
    auto mode = std::ios::out;
    if( outputMode == "append" )
        mode |= std::ios::app;
-   std::ofstream logFile( logFileName.getString(), mode );
+   std::ofstream logFile( logFileName, mode );
 
    // init benchmark and common metadata
-   Benchmark<> benchmark( loops, verbose );
+   Benchmark<> benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
@@ -456,10 +456,5 @@ int main( int argc, char* argv[] )
       run_benchmarks< Devices::Cuda >( benchmark );
 #endif
 
-   if( ! benchmark.save( logFile ) ) {
-      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-      return EXIT_FAILURE;
-   }
-
    return EXIT_SUCCESS;
 }
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index de3ccdbb5..5e31de127 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -428,10 +428,10 @@ int main( int argc, char* argv[] )
    auto mode = std::ios::out;
    if( outputMode == "append" )
        mode |= std::ios::app;
-   std::ofstream logFile( logFileName.getString(), mode );
+   std::ofstream logFile( logFileName, mode );
 
    // init benchmark and common metadata
-   Benchmark<> benchmark( loops, verbose );
+   Benchmark<> benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
@@ -444,10 +444,5 @@ int main( int argc, char* argv[] )
       run_benchmarks< Devices::Cuda >( benchmark );
 #endif
 
-   if( ! benchmark.save( logFile ) ) {
-      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-      return EXIT_FAILURE;
-   }
-
    return EXIT_SUCCESS;
 }
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index acfba1ef0..18f8d0ce7 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -242,21 +242,13 @@ main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile;
    if( rank == 0 )
-      logFile.open( logFileName.getString(), mode );
+      logFile.open( logFileName, mode );
 
    // init benchmark and common metadata
-   Benchmark<> benchmark( loops, verbose );
+   Benchmark<> benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
 
-   const bool status = resolveRealTypes( benchmark, metadata, parameters );
-
-   if( rank == 0 )
-      if( ! benchmark.save( logFile ) ) {
-         std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-         return EXIT_FAILURE;
-      }
-
-   return ! status;
+   return ! resolveRealTypes( benchmark, metadata, parameters );
 }
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 639ff7390..839c98f5d 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -150,10 +150,10 @@ main( int argc, char* argv[] )
    auto mode = std::ios::out;
    if( outputMode == "append" )
        mode |= std::ios::app;
-   std::ofstream logFile( logFileName.getString(), mode );
+   std::ofstream logFile( logFileName, mode );
 
    // init benchmark and common metadata
-   TNL::Benchmarks::SpMV::BenchmarkType benchmark( loops, verbose );
+   TNL::Benchmarks::SpMV::BenchmarkType benchmark( logFile, loops, verbose );
 
    // prepare global metadata
    Logging::MetadataMap metadata = getHardwareMetadata();
@@ -164,11 +164,6 @@ main( int argc, char* argv[] )
    if( precision == "all" || precision == "double" )
       runSpMVBenchmarks< double >( benchmark, metadata, inputFileName, parameters, verboseMR );
 
-   if( ! benchmark.save( logFile ) ) {
-      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-      return EXIT_FAILURE;
-   }
-
    // Confirm that the benchmark has finished
    std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
    return EXIT_SUCCESS;
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 6ea3ef517..8516107b1 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -461,21 +461,16 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
 //   const String & precision = parameters.getParameter< String >( "precision" );
 //   const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
 
-   Benchmark<> benchmark; //( loops, verbose );
-   benchmark.setup( parameters );
-   Logging::MetadataMap metadata = getHardwareMetadata();
-   runBenchmark< Dimension >( parameters, benchmark, metadata );
-
    auto mode = std::ios::out;
    if( outputMode == "append" )
        mode |= std::ios::app;
-   std::ofstream logFile( logFileName.getString(), mode );
+   std::ofstream logFile( logFileName, mode );
+
+   Benchmark<> benchmark( logFile ); //( loops, verbose );
+   benchmark.setup( parameters );
+   Logging::MetadataMap metadata = getHardwareMetadata();
+   runBenchmark< Dimension >( parameters, benchmark, metadata );
 
-   if( ! benchmark.save( logFile ) )
-   {
-      std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
-      return false;
-   }
    return true;
 }
 
@@ -515,7 +510,5 @@ int main( int argc, char* argv[] )
             break;
       }
    }
-   if( status == false )
-      return EXIT_FAILURE;
-   return EXIT_SUCCESS;
+   return ! status;
 }
diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index 10f036f37..164849373 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -68,7 +68,7 @@ class Benchmark
       using MetadataColumns = typename Logger::MetadataColumns;
       using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;
 
-      Benchmark( int loops = 10, bool verbose = true );
+      Benchmark( std::ostream& output, int loops = 10, bool verbose = true );
 
       static void configSetup( Config::ConfigDescription& config );
 
@@ -147,8 +147,6 @@ class Benchmark
       // "time" method could not be called (e.g. due to failed allocation).
       void addErrorMessage( const std::string& message );
 
-      bool save( std::ostream& logFile );
-
       SolverMonitorType& getMonitor();
 
       double getBaseTime() const;
diff --git a/src/TNL/Benchmarks/Benchmarks.hpp b/src/TNL/Benchmarks/Benchmarks.hpp
index a2519c1b5..a8ac4888f 100644
--- a/src/TNL/Benchmarks/Benchmarks.hpp
+++ b/src/TNL/Benchmarks/Benchmarks.hpp
@@ -25,8 +25,8 @@ namespace Benchmarks {
 
 template< typename Logger >
 Benchmark< Logger >::
-Benchmark( int loops, bool verbose )
-: logger(verbose), loops(loops)
+Benchmark( std::ostream& output, int loops, bool verbose )
+: logger(output, verbose), loops(loops)
 {}
 
 template< typename Logger >
@@ -230,14 +230,6 @@ addErrorMessage( const std::string& message )
    std::cerr << message << std::endl;
 }
 
-template< typename Logger >
-bool
-Benchmark< Logger >::
-save( std::ostream& logFile )
-{
-   return logger.save( logFile );
-}
-
 template< typename Logger >
 auto
 Benchmark< Logger >::
diff --git a/src/TNL/Benchmarks/CustomLogging.h b/src/TNL/Benchmarks/CustomLogging.h
index c09a53268..6d87abc4b 100644
--- a/src/TNL/Benchmarks/CustomLogging.h
+++ b/src/TNL/Benchmarks/CustomLogging.h
@@ -204,17 +204,6 @@ public:
       header_changed = true;
    }
 
-   virtual bool save( std::ostream & logFile ) override
-   {
-      closeTable();
-      logFile << log.str();
-      if( logFile.good() ) {
-         log.str() = "";
-         return true;
-      }
-      return false;
-   }
-
 protected:
    // manual double -> string conversion with fixed precision
    static std::string
@@ -229,8 +218,6 @@ protected:
       return std::string( str.str().data() );
    }
 
-   std::stringstream log;
-
    MetadataColumns metadataColumns;
    std::map< std::string, int > metadataWidths;
    bool header_changed = true;
diff --git a/src/TNL/Benchmarks/JsonLogging.h b/src/TNL/Benchmarks/JsonLogging.h
index 6ff7236a5..db4bc01dd 100644
--- a/src/TNL/Benchmarks/JsonLogging.h
+++ b/src/TNL/Benchmarks/JsonLogging.h
@@ -187,16 +187,6 @@ public:
       header_changed = true;
    }
 
-   virtual bool save( std::ostream & logFile ) override
-   {
-      logFile << log.str();
-      if( logFile.good() ) {
-         log.str() = "";
-         return true;
-      }
-      return false;
-   }
-
 protected:
    // manual double -> string conversion with fixed precision
    static std::string
@@ -211,8 +201,6 @@ protected:
       return std::string( str.str().data() );
    }
 
-   std::stringstream log;
-
    MetadataColumns metadataColumns;
    std::map< std::string, int > metadataWidths;
    bool header_changed = true;
diff --git a/src/TNL/Benchmarks/Logging.h b/src/TNL/Benchmarks/Logging.h
index 7c851ad05..e90c4dad0 100644
--- a/src/TNL/Benchmarks/Logging.h
+++ b/src/TNL/Benchmarks/Logging.h
@@ -19,7 +19,7 @@
 #include <iostream>
 #include <iomanip>
 #include <string>
-#include <sstream>
+#include <fstream>
 
 namespace TNL {
 namespace Benchmarks {
@@ -86,9 +86,23 @@ public:
    using RowElements = LoggingRowElements;
    using WidthHints = std::vector< int >;
 
-   Logging( int verbose = true )
-   : verbose(verbose)
-   {}
+   Logging( std::ostream& log, int verbose = true )
+   : log(log), verbose(verbose)
+   {
+      try {
+         // check if we got an open file
+         std::ofstream& file = dynamic_cast< std::ofstream& >( log );
+         if( file.is_open() )
+            // enable exceptions, but only if we got an open file
+            // (under MPI, only the master rank typically opens the log file and thus
+            // logs from other ranks are ignored here)
+            file.exceptions( std::ostream::failbit | std::ostream::badbit | std::ostream::eofbit );
+      }
+      catch( std::bad_cast& ) {
+         // also enable exceptions if we did not get a file
+         log.exceptions( std::ostream::failbit | std::ostream::badbit | std::ostream::eofbit );
+      }
+   }
 
    void
    setVerbose( int verbose )
@@ -123,9 +137,8 @@ public:
 
    virtual void closeTable() = 0;
 
-   virtual bool save( std::ostream& logFile ) = 0;
-
 protected:
+   std::ostream& log;
    int verbose = 0;
 };
 
-- 
GitLab


From 4551f45ff4e02efdae2dcd2365a4b9060d62208a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 15 Nov 2021 11:50:57 +0100
Subject: [PATCH 32/40] Benchmarks: refactored common methods from
 CustomLogging and JsonLogging into the base class

---
 src/TNL/Benchmarks/CustomLogging.h | 49 -----------------------------
 src/TNL/Benchmarks/JsonLogging.h   | 49 -----------------------------
 src/TNL/Benchmarks/Logging.h       | 50 +++++++++++++++++++++++++++---
 3 files changed, 46 insertions(+), 102 deletions(-)

diff --git a/src/TNL/Benchmarks/CustomLogging.h b/src/TNL/Benchmarks/CustomLogging.h
index 6d87abc4b..dc859f254 100644
--- a/src/TNL/Benchmarks/CustomLogging.h
+++ b/src/TNL/Benchmarks/CustomLogging.h
@@ -49,51 +49,6 @@ public:
          std::cout << std::endl;
    }
 
-   virtual void setMetadataColumns( const MetadataColumns& elements ) override
-   {
-      // check if a header element changed (i.e. a first item of the pairs)
-      if( metadataColumns.size() != elements.size() )
-         header_changed = true;
-      else
-         for( std::size_t i = 0; i < metadataColumns.size(); i++ )
-            if( metadataColumns[ i ].first != elements[ i ].first ) {
-               header_changed = true;
-               break;
-            }
-      metadataColumns = elements;
-   }
-
-   virtual void
-   setMetadataElement( const typename MetadataColumns::value_type & element,
-                       int insertPosition = -1 /* negative values insert from the end */ ) override
-   {
-      bool found = false;
-      for( auto & it : metadataColumns )
-         if( it.first == element.first ) {
-            if( it.second != element.second )
-               it.second = element.second;
-            found = true;
-            break;
-         }
-      if( ! found ) {
-         if( insertPosition < 0 )
-            metadataColumns.insert( metadataColumns.end() + insertPosition + 1, element );
-         else
-            metadataColumns.insert( metadataColumns.begin() + insertPosition, element );
-         header_changed = true;
-      }
-   }
-
-   virtual void
-   setMetadataWidths( const std::map< std::string, int > & widths ) override
-   {
-      for( auto & it : widths )
-         if( metadataWidths.count( it.first ) )
-            metadataWidths[ it.first ] = it.second;
-         else
-            metadataWidths.insert( it );
-   }
-
    void
    writeTableHeader( const std::string & spanningElement,
                      const HeaderElements & subElements )
@@ -217,10 +172,6 @@ protected:
       str << num;
       return std::string( str.str().data() );
    }
-
-   MetadataColumns metadataColumns;
-   std::map< std::string, int > metadataWidths;
-   bool header_changed = true;
 };
 
 } // namespace Benchmarks
diff --git a/src/TNL/Benchmarks/JsonLogging.h b/src/TNL/Benchmarks/JsonLogging.h
index db4bc01dd..948313470 100644
--- a/src/TNL/Benchmarks/JsonLogging.h
+++ b/src/TNL/Benchmarks/JsonLogging.h
@@ -44,51 +44,6 @@ public:
       }
    }
 
-   virtual void setMetadataColumns( const MetadataColumns& elements ) override
-   {
-      // check if a header element changed (i.e. a first item of the pairs)
-      if( metadataColumns.size() != elements.size() )
-         header_changed = true;
-      else
-         for( std::size_t i = 0; i < metadataColumns.size(); i++ )
-            if( metadataColumns[ i ].first != elements[ i ].first ) {
-               header_changed = true;
-               break;
-            }
-      this->metadataColumns = elements;
-   }
-
-   virtual void
-   setMetadataElement( const typename MetadataColumns::value_type & element,
-                       int insertPosition = -1 /* negative values insert from the end */ ) override
-   {
-      bool found = false;
-      for( auto & it : metadataColumns )
-         if( it.first == element.first ) {
-            if( it.second != element.second )
-               it.second = element.second;
-            found = true;
-            break;
-         }
-      if( ! found ) {
-         if( insertPosition < 0 )
-            metadataColumns.insert( metadataColumns.end() + insertPosition + 1, element );
-         else
-            metadataColumns.insert( metadataColumns.begin() + insertPosition, element );
-         header_changed = true;
-      }
-   }
-
-   virtual void
-   setMetadataWidths( const std::map< std::string, int > & widths ) override
-   {
-      for( auto & it : widths )
-         if( metadataWidths.count( it.first ) )
-            metadataWidths[ it.first ] = it.second;
-         else
-            metadataWidths.insert( it );
-   }
-
    void writeHeader( const HeaderElements& headerElements, const WidthHints& widths )
    {
       TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
@@ -200,10 +155,6 @@ protected:
       str << num;
       return std::string( str.str().data() );
    }
-
-   MetadataColumns metadataColumns;
-   std::map< std::string, int > metadataWidths;
-   bool header_changed = true;
 };
 
 } // namespace Benchmarks
diff --git a/src/TNL/Benchmarks/Logging.h b/src/TNL/Benchmarks/Logging.h
index e90c4dad0..88b45d0fa 100644
--- a/src/TNL/Benchmarks/Logging.h
+++ b/src/TNL/Benchmarks/Logging.h
@@ -119,12 +119,50 @@ public:
 
    virtual void writeMetadata( const MetadataMap & metadata ) = 0;
 
-   virtual void setMetadataColumns( const MetadataColumns& elements ) = 0;
+   virtual void setMetadataColumns( const MetadataColumns& elements )
+   {
+      // check if a header element changed (i.e. a first item of the pairs)
+      if( metadataColumns.size() != elements.size() )
+         header_changed = true;
+      else
+         for( std::size_t i = 0; i < metadataColumns.size(); i++ )
+            if( metadataColumns[ i ].first != elements[ i ].first ) {
+               header_changed = true;
+               break;
+            }
+      metadataColumns = elements;
+   }
 
-   virtual void setMetadataElement( const typename MetadataColumns::value_type & element,
-                                    int insertPosition = -1 /* negative values insert from the end */ ) = 0;
+   virtual void
+   setMetadataElement( const typename MetadataColumns::value_type & element,
+                       int insertPosition = -1 /* negative values insert from the end */ )
+   {
+      bool found = false;
+      for( auto & it : metadataColumns )
+         if( it.first == element.first ) {
+            if( it.second != element.second )
+               it.second = element.second;
+            found = true;
+            break;
+         }
+      if( ! found ) {
+         if( insertPosition < 0 )
+            metadataColumns.insert( metadataColumns.end() + insertPosition + 1, element );
+         else
+            metadataColumns.insert( metadataColumns.begin() + insertPosition, element );
+         header_changed = true;
+      }
+   }
 
-   virtual void setMetadataWidths( const std::map< std::string, int > & widths ) = 0;
+   virtual void
+   setMetadataWidths( const std::map< std::string, int > & widths )
+   {
+      for( auto & it : widths )
+         if( metadataWidths.count( it.first ) )
+            metadataWidths[ it.first ] = it.second;
+         else
+            metadataWidths.insert( it );
+   }
 
    virtual void
    logResult( const std::string& performer,
@@ -140,6 +178,10 @@ public:
 protected:
    std::ostream& log;
    int verbose = 0;
+
+   MetadataColumns metadataColumns;
+   std::map< std::string, int > metadataWidths;
+   bool header_changed = true;
 };
 
 } // namespace Benchmarks
-- 
GitLab


From c606749c7d94d80f7b8c308e2ee27f6ba38d0cf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Sun, 14 Nov 2021 22:56:00 +0100
Subject: [PATCH 33/40] Clened up benchmarks of ODE solvers

---
 .../ODESolvers/tnl-benchmark-ode-solvers.h    | 32 +++----------------
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index 18f8d0ce7..354af64fc 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -111,8 +111,9 @@ struct ODESolversBenchmark
         Benchmark<>::MetadataMap metadata,
         const Config::ParameterContainer& parameters )
    {
-      const String name = String( (TNL::MPI::GetSize() > 1) ? "Distributed ODE solvers" : "ODE solvers" );
-                          //+ " (" + parameters.getParameter< String >( "name" ) + "): ";
+      const String precision = getType< Real >();
+      const String name = String( (TNL::MPI::GetSize() > 1) ? "Distributed ODE solvers" : "ODE solvers" ) + " (" + precision + ")";
+      metadata["precision"] = precision;
       benchmark.newBenchmark( name, metadata );
       for( size_t dofs = 25; dofs <= 10000000; dofs *= 2 ) {
          benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
@@ -120,33 +121,10 @@ struct ODESolversBenchmark
             { "DOFs", convertToString( dofs ) },
          } ));
 
-         if( TNL::MPI::GetSize() > 1 )
-            runDistributed( benchmark, metadata, parameters, dofs );
-         else
-            runNonDistributed( benchmark, metadata, parameters, dofs );
+         benchmarkODESolvers< Real, Index >( benchmark, parameters, dofs );
       }
       return true;
    }
-
-   static void
-   runDistributed( Benchmark<>& benchmark,
-                   Benchmark<>::MetadataMap metadata,
-                   const Config::ParameterContainer& parameters,
-                   size_t dofs )
-   {
-      std::cout << "Iterative solvers:" << std::endl;
-      benchmarkODESolvers< Real, Index >( benchmark, parameters, dofs );
-   }
-
-   static void
-   runNonDistributed( Benchmark<>& benchmark,
-                      Benchmark<>::MetadataMap metadata,
-                      const Config::ParameterContainer& parameters,
-                      size_t dofs )
-   {
-      std::cout << "Iterative solvers:" << std::endl;
-      benchmarkODESolvers< Real, Index >( benchmark, parameters, dofs );
-   }
 };
 
 template< typename Real >
@@ -177,7 +155,7 @@ void
 configSetup( Config::ConfigDescription& config )
 {
    config.addDelimiter( "Benchmark settings:" );
-   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-linear-solvers.log");
+   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-ode-solvers.log");
    config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
    config.addEntryEnum( "append" );
    config.addEntryEnum( "overwrite" );
-- 
GitLab


From 0bfcbe164500b9a1c28cba4ba93f0a019601c93e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 15 Nov 2021 08:15:38 +0100
Subject: [PATCH 34/40] Benchmarks: added loops to the logs

---
 src/Benchmarks/SpMV/SpmvBenchmarkResult.h | 6 +++---
 src/TNL/Benchmarks/Benchmarks.h           | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
index 755489e07..128b00334 100644
--- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
+++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h
@@ -44,12 +44,12 @@ struct SpmvBenchmarkResult
 
    virtual HeaderElements getTableHeader() const override
    {
-      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2" });
+      return HeaderElements({ "time", "stddev", "stddev/time", "loops", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2" });
    }
 
    virtual std::vector< int > getColumnWidthHints() const override
    {
-      return std::vector< int >({ 14, 14, 14, 14, 14, 14, 14 });
+      return std::vector< int >({ 14, 14, 14, 6, 14, 14, 14, 14 });
    }
 
    virtual RowElements getRowElements() const override
@@ -59,7 +59,7 @@ struct SpmvBenchmarkResult
       auto diff = csrResult - benchmarkResultCopy;
       RowElements elements;
       // write in scientific format to avoid precision loss
-      elements << std::scientific << time << stddev << stddev/time << bandwidth;
+      elements << std::scientific << time << stddev << stddev/time << loops << bandwidth;
       if( speedup != 0.0 )
          elements << speedup;
       else
diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index 164849373..81c334e14 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -38,19 +38,19 @@ struct BenchmarkResult
 
    virtual HeaderElements getTableHeader() const
    {
-      return HeaderElements({ "time", "stddev", "stddev/time", "bandwidth", "speedup" });
+      return HeaderElements({ "time", "stddev", "stddev/time", "loops", "bandwidth", "speedup" });
    }
 
    virtual std::vector< int > getColumnWidthHints() const
    {
-      return std::vector< int >({ 14, 14, 14, 14, 14 });
+      return std::vector< int >({ 14, 14, 14, 6, 14, 14 });
    }
 
    virtual RowElements getRowElements() const
    {
       RowElements elements;
       // write in scientific format to avoid precision loss
-      elements << std::scientific << time << stddev << stddev / time << bandwidth;
+      elements << std::scientific << time << stddev << stddev / time << loops << bandwidth;
       if( speedup != 0 )
          elements << speedup;
       else
-- 
GitLab


From b82fbda3012675ce4abcf6a61b47b1b7d674c105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 15 Nov 2021 11:36:35 +0100
Subject: [PATCH 35/40] Benchmarks: implemented logging of metadata into a
 separate JSON file

Fixes #96
---
 src/Benchmarks/BLAS/tnl-benchmark-blas.h      |  7 ++--
 .../DistSpMV/tnl-benchmark-distributed-spmv.h |  7 ++--
 .../tnl-benchmark-linear-solvers.h            |  7 ++--
 .../NDArray/tnl-benchmark-ndarray-boundary.h  |  7 ++--
 .../NDArray/tnl-benchmark-ndarray.h           |  7 ++--
 .../ODESolvers/tnl-benchmark-ode-solvers.h    |  7 ++--
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h      |  7 ++--
 .../Traversers/tnl-benchmark-traversers.h     | 10 +++++-
 src/TNL/Benchmarks/Utils.h                    | 36 +++++++++++++++++++
 9 files changed, 73 insertions(+), 22 deletions(-)

diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index 169645fbe..a570956da 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -168,11 +168,12 @@ main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    if( precision == "all" || precision == "float" )
       runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor );
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index 404214409..ba28f9970 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -333,11 +333,12 @@ main( int argc, char* argv[] )
    if( rank == 0 )
       logFile.open( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    // TODO: implement resolveMatrixType
 //   return ! Matrices::resolveMatrixType< MainConfig,
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index ad2fb38f3..c7f11a50b 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -613,11 +613,12 @@ main( int argc, char* argv[] )
    if( rank == 0 )
       logFile.open( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    // TODO: implement resolveMatrixType
 //   return ! Matrices::resolveMatrixType< MainConfig,
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
index 00aff7527..6e0e807e2 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h
@@ -442,11 +442,12 @@ int main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    const String devices = parameters.getParameter< String >( "devices" );
    if( devices == "all" || devices == "host" )
diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
index 5e31de127..f0d2e1a9e 100644
--- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
+++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h
@@ -430,11 +430,12 @@ int main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    const String devices = parameters.getParameter< String >( "devices" );
    if( devices == "all" || devices == "host" )
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index 354af64fc..156bb80ea 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -222,11 +222,12 @@ main( int argc, char* argv[] )
    if( rank == 0 )
       logFile.open( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    return ! resolveRealTypes( benchmark, metadata, parameters );
 }
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 839c98f5d..b9e34c5aa 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -152,11 +152,12 @@ main( int argc, char* argv[] )
        mode |= std::ios::app;
    std::ofstream logFile( logFileName, mode );
 
-   // init benchmark and common metadata
+   // init benchmark and set parameters
    TNL::Benchmarks::SpMV::BenchmarkType benchmark( logFile, loops, verbose );
 
-   // prepare global metadata
-   Logging::MetadataMap metadata = getHardwareMetadata();
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    // Initiate setup of benchmarks
    if( precision == "all" || precision == "float" )
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 8516107b1..b28382a7f 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -466,9 +466,17 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
        mode |= std::ios::app;
    std::ofstream logFile( logFileName, mode );
 
+   // init benchmark and set parameters
    Benchmark<> benchmark( logFile ); //( loops, verbose );
    benchmark.setup( parameters );
-   Logging::MetadataMap metadata = getHardwareMetadata();
+
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = getHardwareMetadata();
+   metadata["loops"] = convertToString( parameters.getParameter< int >( "loops" ) );
+   metadata["reset"] = convertToString( parameters.getParameter< bool >( "reset" ) );
+   metadata["minimal test time"] = convertToString( parameters.getParameter< double >( "min-time" ) );
+   writeMapAsJson( metadata, logFileName, ".metadata.json" );
+
    runBenchmark< Dimension >( parameters, benchmark, metadata );
 
    return true;
diff --git a/src/TNL/Benchmarks/Utils.h b/src/TNL/Benchmarks/Utils.h
index e60adc229..e1e243437 100644
--- a/src/TNL/Benchmarks/Utils.h
+++ b/src/TNL/Benchmarks/Utils.h
@@ -15,6 +15,8 @@
 
 #include <tuple>
 #include <map>
+#include <fstream>
+#include <experimental/filesystem>
 
 #include <TNL/Timer.h>
 #include <TNL/Devices/Cuda.h>
@@ -146,5 +148,39 @@ inline std::map< std::string, std::string > getHardwareMetadata()
    return metadata;
 }
 
+inline void writeMapAsJson( const std::map< std::string, std::string >& data,
+                            std::ostream& out )
+{
+   out << "{\n";
+   for( auto it = data.begin(); it != data.end(); ) {
+      out << "\t\"" << it->first << "\": \"" << it->second << "\"";
+      // increment the iterator now to peek at the next element
+      it++;
+      // write a comma if there are still elements remaining
+      if( it != data.end() )
+         out << ",";
+      out << "\n";
+   }
+   out << "}\n" << std::flush;
+}
+
+inline void writeMapAsJson( const std::map< std::string, std::string >& data,
+                            std::string filename,
+                            std::string newExtension = "" )
+{
+   namespace fs = std::experimental::filesystem;
+
+   if( newExtension != "" ) {
+      const fs::path oldPath = filename;
+      const fs::path newPath = oldPath.parent_path() / ( oldPath.stem().string() + newExtension );
+      filename = newPath;
+   }
+
+   std::ofstream file( filename );
+   // enable exceptions
+   file.exceptions( std::ostream::failbit | std::ostream::badbit | std::ostream::eofbit );
+   writeMapAsJson( data, file );
+}
+
 } // namespace Benchmarks
 } // namespace TNL
-- 
GitLab


From 1c3b49b42bb57e569f2497d6d41de6e91bec26e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 15 Nov 2021 11:31:15 +0100
Subject: [PATCH 36/40] Benchmarks: removed method newBenchmark and refactored
 logging of remaining metadata into table columns

The global metadata like hardware information is logged into a separate JSON
file, so the handling of MetadataMap in the Logging classes is not
needed anymore.
---
 src/Benchmarks/BLAS/tnl-benchmark-blas.h      | 33 +++++++++----------
 .../DistSpMV/tnl-benchmark-distributed-spmv.h | 22 ++++++-------
 .../tnl-benchmark-linear-solvers.h            | 22 ++++++-------
 .../ODESolvers/tnl-benchmark-ode-solvers.h    | 26 +++++++--------
 src/Benchmarks/SpMV/spmv.h                    |  1 +
 src/Benchmarks/SpMV/tnl-benchmark-spmv.h      | 13 ++------
 .../Traversers/tnl-benchmark-traversers.h     | 15 +++++----
 src/TNL/Benchmarks/Benchmarks.h               |  8 -----
 src/TNL/Benchmarks/Benchmarks.hpp             | 24 --------------
 src/TNL/Benchmarks/CustomLogging.h            | 30 -----------------
 src/TNL/Benchmarks/JsonLogging.h              | 24 --------------
 src/TNL/Benchmarks/Logging.h                  |  7 ----
 12 files changed, 58 insertions(+), 167 deletions(-)

diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index a570956da..67f40f941 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -31,36 +31,33 @@ using namespace TNL::Benchmarks;
 template< typename Real >
 void
 runBlasBenchmarks( Benchmark<> & benchmark,
-                   Benchmark<>::MetadataMap metadata,
                    const std::size_t & minSize,
                    const std::size_t & maxSize,
                    const double & sizeStepFactor )
 {
-   const String precision = getType< Real >();
-   metadata["precision"] = precision;
-
    // Array operations
-   benchmark.newBenchmark( String("Array operations (") + precision + ", host allocator = Host)",
-                           metadata );
+   std::cout << "\n== Array operations ==\n" << std::endl;
    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "precision", getType< Real >() },
+         { "host allocator", "Host" },
          { "size", convertToString( size ) },
       } ));
       benchmarkArrayOperations< Real >( benchmark, size );
    }
 #ifdef HAVE_CUDA
-   benchmark.newBenchmark( String("Array operations (") + precision + ", host allocator = CudaHost)",
-                           metadata );
    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "precision", getType< Real >() },
+         { "host allocator", "CudaHost" },
          { "size", convertToString( size ) },
       } ));
       benchmarkArrayOperations< Real, int, Allocators::CudaHost >( benchmark, size );
    }
-   benchmark.newBenchmark( String("Array operations (") + precision + ", host allocator = CudaManaged)",
-                           metadata );
    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "precision", getType< Real >() },
+         { "host allocator", "CudaManaged" },
          { "size", convertToString( size ) },
       } ));
       benchmarkArrayOperations< Real, int, Allocators::CudaManaged >( benchmark, size );
@@ -68,10 +65,10 @@ runBlasBenchmarks( Benchmark<> & benchmark,
 #endif
 
    // Vector operations
-   benchmark.newBenchmark( String("Vector operations (") + precision + ")",
-                           metadata );
+   std::cout << "\n== Vector operations ==\n" << std::endl;
    for( std::size_t size = minSize; size <= maxSize; size *= sizeStepFactor ) {
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "precision", getType< Real >() },
          { "size", convertToString( size ) },
       } ));
       benchmarkVectorOperations< Real >( benchmark, size );
@@ -79,10 +76,10 @@ runBlasBenchmarks( Benchmark<> & benchmark,
 
    // Triad benchmark: copy from host, compute, copy to host
 #ifdef HAVE_CUDA
-   benchmark.newBenchmark( String("Triad benchmark (") + precision + ")",
-                           metadata );
+   std::cout << "\n== Triad ==\n" << std::endl;
    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "precision", getType< Real >() },
          { "size", convertToString( size ) },
       } ));
       benchmarkTriad< Real >( benchmark, size );
@@ -90,13 +87,13 @@ runBlasBenchmarks( Benchmark<> & benchmark,
 #endif
 
    // Dense matrix-vector multiplication
-   benchmark.newBenchmark( String("Dense matrix-vector multiplication (") + precision + ")",
-                           metadata );
+   std::cout << "\n== Dense matrix-vector multiplication ==\n" << std::endl;
    for( std::size_t rows = 10; rows <= 20000 * 20000; rows *= 2 ) {
       for( std::size_t columns = 10; columns <= 20000 * 20000; columns *= 2 ) {
          if( rows * columns > 20000 * 20000 )
             break;
          benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+            { "precision", getType< Real >() },
             { "rows", convertToString( rows ) },
             { "columns", convertToString( columns ) }
          } ));
@@ -176,9 +173,9 @@ main( int argc, char* argv[] )
    writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    if( precision == "all" || precision == "float" )
-      runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor );
+      runBlasBenchmarks< float >( benchmark, minSize, maxSize, sizeStepFactor );
    if( precision == "all" || precision == "double" )
-      runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor );
+      runBlasBenchmarks< double >( benchmark, minSize, maxSize, sizeStepFactor );
 
    return EXIT_SUCCESS;
 }
diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
index ba28f9970..5329540cb 100644
--- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
+++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h
@@ -157,7 +157,6 @@ struct SpmvBenchmark
 
    static bool
    run( Benchmark<>& benchmark,
-        Benchmark<>::MetadataMap metadata,
         const Config::ParameterContainer& parameters )
    {
       MatrixType matrix;
@@ -169,10 +168,11 @@ struct SpmvBenchmark
       matrix.getCompressedRowLengths( rowLengths );
       const IndexType maxRowLength = max( rowLengths );
 
-      const String name = String( (TNL::MPI::GetSize() > 1) ? "DistSpMV" : "SpMV" )
-                          + " (" + parameters.getParameter< String >( "name" ) + "): ";
-      benchmark.newBenchmark( name, metadata );
+      const String title = (TNL::MPI::GetSize() > 1) ? "DistSpMV" : "SpMV";
+      std::cout << "\n== " << title << " ==\n" << std::endl;
+
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "matrix name", parameters.getParameter< String >( "name" ) },
          // TODO: strip the device
 //         { "matrix type", matrix.getType() },
          { "rows", convertToString( matrix.getRows() ) },
@@ -190,15 +190,15 @@ struct SpmvBenchmark
          MatrixType matrix_perm;
          Matrices::reorderSparseMatrix( matrix, matrix_perm, perm, iperm );
          if( TNL::MPI::GetSize() > 1 )
-            runDistributed( benchmark, metadata, parameters, matrix_perm, vector );
+            runDistributed( benchmark, parameters, matrix_perm, vector );
          else
-            runNonDistributed( benchmark, metadata, parameters, matrix_perm, vector );
+            runNonDistributed( benchmark, parameters, matrix_perm, vector );
       }
       else {
          if( TNL::MPI::GetSize() > 1 )
-            runDistributed( benchmark, metadata, parameters, matrix, vector );
+            runDistributed( benchmark, parameters, matrix, vector );
          else
-            runNonDistributed( benchmark, metadata, parameters, matrix, vector );
+            runNonDistributed( benchmark, parameters, matrix, vector );
       }
 
       return true;
@@ -206,7 +206,6 @@ struct SpmvBenchmark
 
    static void
    runNonDistributed( Benchmark<>& benchmark,
-                      Benchmark<>::MetadataMap metadata,
                       const Config::ParameterContainer& parameters,
                       MatrixType& matrix,
                       VectorType& vector )
@@ -219,7 +218,6 @@ struct SpmvBenchmark
 
    static void
    runDistributed( Benchmark<>& benchmark,
-                   Benchmark<>::MetadataMap metadata,
                    const Config::ParameterContainer& parameters,
                    MatrixType& matrix,
                    VectorType& vector )
@@ -343,12 +341,12 @@ main( int argc, char* argv[] )
    // TODO: implement resolveMatrixType
 //   return ! Matrices::resolveMatrixType< MainConfig,
 //                                         Devices::Host,
-//                                         SpmvBenchmark >( benchmark, metadata, parameters );
+//                                         SpmvBenchmark >( benchmark, parameters );
    using MatrixType = TNL::Matrices::SparseMatrix< double,
                                                    Devices::Host,
                                                    int,
                                                    TNL::Matrices::GeneralMatrix,
                                                    SegmentsType
                                                  >;
-   return ! SpmvBenchmark< MatrixType >::run( benchmark, metadata, parameters );
+   return ! SpmvBenchmark< MatrixType >::run( benchmark, parameters );
 }
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index c7f11a50b..acb02a434 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -338,7 +338,6 @@ struct LinearSolversBenchmark
 
    static bool
    run( Benchmark<>& benchmark,
-        Benchmark<>::MetadataMap metadata,
         const Config::ParameterContainer& parameters )
    {
       const String file_matrix = parameters.getParameter< String >( "input-matrix" );
@@ -381,10 +380,11 @@ struct LinearSolversBenchmark
       matrixPointer->getCompressedRowLengths( rowLengths );
       const IndexType maxRowLength = max( rowLengths );
 
-      const String name = String( (TNL::MPI::GetSize() > 1) ? "Distributed linear solvers" : "Linear solvers" )
-                          + " (" + parameters.getParameter< String >( "name" ) + "): ";
-      benchmark.newBenchmark( name, metadata );
+      const String title = (TNL::MPI::GetSize() > 1) ? "Distributed linear solvers" : "Linear solvers";
+      std::cout << "\n== " << title << " ==\n" << std::endl;
+
       benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
+         { "matrix name", parameters.getParameter< String >( "name" ) },
          // TODO: strip the device
 //         { "matrix type", matrixPointer->getType() },
          { "rows", convertToString( matrixPointer->getRows() ) },
@@ -407,15 +407,15 @@ struct LinearSolversBenchmark
          Matrices::reorderArray( x0, x0_perm, perm );
          Matrices::reorderArray( b, b_perm, perm );
          if( TNL::MPI::GetSize() > 1 )
-            runDistributed( benchmark, metadata, parameters, matrix_perm, x0_perm, b_perm );
+            runDistributed( benchmark, parameters, matrix_perm, x0_perm, b_perm );
          else
-            runNonDistributed( benchmark, metadata, parameters, matrix_perm, x0_perm, b_perm );
+            runNonDistributed( benchmark, parameters, matrix_perm, x0_perm, b_perm );
       }
       else {
          if( TNL::MPI::GetSize() > 1 )
-            runDistributed( benchmark, metadata, parameters, matrixPointer, x0, b );
+            runDistributed( benchmark, parameters, matrixPointer, x0, b );
          else
-            runNonDistributed( benchmark, metadata, parameters, matrixPointer, x0, b );
+            runNonDistributed( benchmark, parameters, matrixPointer, x0, b );
       }
 
       return true;
@@ -423,7 +423,6 @@ struct LinearSolversBenchmark
 
    static void
    runDistributed( Benchmark<>& benchmark,
-                   Benchmark<>::MetadataMap metadata,
                    const Config::ParameterContainer& parameters,
                    const SharedPointer< MatrixType >& matrixPointer,
                    const VectorType& x0,
@@ -467,7 +466,6 @@ struct LinearSolversBenchmark
 
    static void
    runNonDistributed( Benchmark<>& benchmark,
-                      Benchmark<>::MetadataMap metadata,
                       const Config::ParameterContainer& parameters,
                       const SharedPointer< MatrixType >& matrixPointer,
                       const VectorType& x0,
@@ -623,12 +621,12 @@ main( int argc, char* argv[] )
    // TODO: implement resolveMatrixType
 //   return ! Matrices::resolveMatrixType< MainConfig,
 //                                         Devices::Host,
-//                                         LinearSolversBenchmark >( benchmark, metadata, parameters );
+//                                         LinearSolversBenchmark >( benchmark, parameters );
    using MatrixType = TNL::Matrices::SparseMatrix< double,
                                                    Devices::Host,
                                                    int,
                                                    TNL::Matrices::GeneralMatrix,
                                                    SegmentsType
                                                  >;
-   return ! LinearSolversBenchmark< MatrixType >::run( benchmark, metadata, parameters );
+   return ! LinearSolversBenchmark< MatrixType >::run( benchmark, parameters );
 }
diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
index 156bb80ea..01b112c2c 100644
--- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
+++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h
@@ -108,16 +108,14 @@ struct ODESolversBenchmark
 
    static bool
    run( Benchmark<>& benchmark,
-        Benchmark<>::MetadataMap metadata,
         const Config::ParameterContainer& parameters )
    {
-      const String precision = getType< Real >();
-      const String name = String( (TNL::MPI::GetSize() > 1) ? "Distributed ODE solvers" : "ODE solvers" ) + " (" + precision + ")";
-      metadata["precision"] = precision;
-      benchmark.newBenchmark( name, metadata );
+      const String title = (TNL::MPI::GetSize() > 1) ? "Distributed ODE solvers" : "ODE solvers";
+      std::cout << "\n== " << title << " ==\n" << std::endl;
+
       for( size_t dofs = 25; dofs <= 10000000; dofs *= 2 ) {
          benchmark.setMetadataColumns( Benchmark<>::MetadataColumns({
-            // TODO: strip the device
+            { "precision", getType< Real >() },
             { "DOFs", convertToString( dofs ) },
          } ));
 
@@ -129,24 +127,22 @@ struct ODESolversBenchmark
 
 template< typename Real >
 bool resolveIndexType( Benchmark<>& benchmark,
-   Benchmark<>::MetadataMap& metadata,
-   Config::ParameterContainer& parameters )
+                       Config::ParameterContainer& parameters )
 {
    const String& index = parameters.getParameter< String >( "index-type" );
-   if( index == "int" ) return ODESolversBenchmark< Real, int >::run( benchmark, metadata, parameters );
-   return ODESolversBenchmark< Real, long int >::run( benchmark, metadata, parameters );
+   if( index == "int" ) return ODESolversBenchmark< Real, int >::run( benchmark, parameters );
+   return ODESolversBenchmark< Real, long int >::run( benchmark, parameters );
 }
 
 bool resolveRealTypes( Benchmark<>& benchmark,
-   Benchmark<>::MetadataMap& metadata,
-   Config::ParameterContainer& parameters )
+                       Config::ParameterContainer& parameters )
 {
    const String& realType = parameters.getParameter< String >( "real-type" );
    if( ( realType == "float" || realType == "all" ) &&
-       ! resolveIndexType< float >( benchmark, metadata, parameters ) )
+       ! resolveIndexType< float >( benchmark, parameters ) )
       return false;
    if( ( realType == "double" || realType == "all" ) &&
-       ! resolveIndexType< double >( benchmark, metadata, parameters ) )
+       ! resolveIndexType< double >( benchmark, parameters ) )
       return false;
    return true;
 }
@@ -229,5 +225,5 @@ main( int argc, char* argv[] )
    std::map< std::string, std::string > metadata = getHardwareMetadata();
    writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
-   return ! resolveRealTypes( benchmark, metadata, parameters );
+   return ! resolveRealTypes( benchmark, parameters );
 }
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 0e57e68c1..dab0d2d93 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -729,6 +729,7 @@ benchmarkSpmv( BenchmarkType& benchmark,
    //
    benchmark.setMetadataColumns({
       { "matrix name", inputFileName },
+      { "precision", getType< Real >() },
       { "rows", convertToString( csrHostMatrix.getRows() ) },
       { "columns", convertToString( csrHostMatrix.getColumns() ) },
       { "nonzeros", convertToString( nonzeros ) },
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index b9e34c5aa..dd2617d3b 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -38,17 +38,10 @@ using namespace TNL::Benchmarks;
 template< typename Real >
 void
 runSpMVBenchmarks( TNL::Benchmarks::SpMV::BenchmarkType & benchmark,
-                   TNL::Benchmarks::SpMV::BenchmarkType::MetadataMap metadata,
                    const String & inputFileName,
                    const Config::ParameterContainer& parameters,
                    bool verboseMR = false )
 {
-   const String precision = getType< Real >();
-   metadata["precision"] = precision;
-
-   // Sparse matrix-vector multiplication
-   benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
-                           metadata );
    // Start the actual benchmark in spmv.h
    try {
       TNL::Benchmarks::SpMV::benchmarkSpmv< Real >( benchmark, inputFileName, parameters, verboseMR );
@@ -161,11 +154,11 @@ main( int argc, char* argv[] )
 
    // Initiate setup of benchmarks
    if( precision == "all" || precision == "float" )
-      runSpMVBenchmarks< float >( benchmark, metadata, inputFileName, parameters, verboseMR );
+      runSpMVBenchmarks< float >( benchmark, inputFileName, parameters, verboseMR );
    if( precision == "all" || precision == "double" )
-      runSpMVBenchmarks< double >( benchmark, metadata, inputFileName, parameters, verboseMR );
+      runSpMVBenchmarks< double >( benchmark, inputFileName, parameters, verboseMR );
 
    // Confirm that the benchmark has finished
-   std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
+   std::cout << "\n==> BENCHMARK FINISHED" << std::endl;
    return EXIT_SUCCESS;
 }
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index b28382a7f..592098b95 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -36,8 +36,7 @@ template< int Dimension,
           typename Real = float,
           typename Index = int >
 bool runBenchmark( const Config::ParameterContainer& parameters,
-                   Benchmark<>& benchmark,
-                   Logging::MetadataMap& metadata )
+                   Benchmark<>& benchmark )
 {
    const std::vector< String >& tests = parameters.getParameter< std::vector< String > >( "tests" );
    // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
@@ -58,7 +57,6 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
    /****
     * Full grid traversing with no boundary conditions
     */
-   benchmark.newBenchmark( String("Traversing without boundary conditions" + convertToString( Dimension ) + "D" ), metadata );
    for( std::size_t size = minSize; size <= maxSize; size *= 2 )
    {
       GridTraversersBenchmark< Dimension, Devices::Host, Real, Index > hostTraverserBenchmark( size );
@@ -78,7 +76,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
       };
 #endif
       benchmark.setMetadataColumns({
-            {"size", convertToString( size ) },
+            { "dimension", convertToString( Dimension ) },
+            { "traverser", "without BC" },
+            { "size", convertToString( size ) },
       });
 
       /****
@@ -260,7 +260,6 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
    /****
     * Full grid traversing including boundary conditions
     */
-   benchmark.newBenchmark( String("Traversing with boundary conditions" + convertToString( Dimension ) + "D" ), metadata );
    for( std::size_t size = minSize; size <= maxSize; size *= 2 )
    {
       GridTraversersBenchmark< Dimension, Devices::Host, Real, Index > hostTraverserBenchmark( size );
@@ -279,7 +278,9 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
 #endif
 
       benchmark.setMetadataColumns({
-            {"size", convertToString( size ) },
+            { "dimension", convertToString( Dimension ) },
+            { "traverser", "with BC" },
+            { "size", convertToString( size ) },
       });
 
       /****
@@ -477,7 +478,7 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
    metadata["minimal test time"] = convertToString( parameters.getParameter< double >( "min-time" ) );
    writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
-   runBenchmark< Dimension >( parameters, benchmark, metadata );
+   runBenchmark< Dimension >( parameters, benchmark );
 
    return true;
 }
diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index 81c334e14..979f8b10a 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -64,7 +64,6 @@ class Benchmark
 {
    public:
       using MetadataElement = typename Logger::MetadataElement;
-      using MetadataMap = typename Logger::MetadataMap;
       using MetadataColumns = typename Logger::MetadataColumns;
       using SolverMonitorType = Solvers::IterativeSolverMonitor< double, int >;
 
@@ -80,13 +79,6 @@ class Benchmark
 
       void setMinTime( const double& minTime );
 
-      // Marks the start of a new benchmark
-      void newBenchmark( const String & title );
-
-      // Marks the start of a new benchmark (with custom metadata)
-      void newBenchmark( const String & title,
-                        MetadataMap metadata );
-
       // Sets metadata columns -- values used for all subsequent rows until
       // the next call to this function.
       void setMetadataColumns( const MetadataColumns & metadata );
diff --git a/src/TNL/Benchmarks/Benchmarks.hpp b/src/TNL/Benchmarks/Benchmarks.hpp
index a8ac4888f..9935fd9ae 100644
--- a/src/TNL/Benchmarks/Benchmarks.hpp
+++ b/src/TNL/Benchmarks/Benchmarks.hpp
@@ -68,30 +68,6 @@ setMinTime( const double& minTime )
    this->minTime = minTime;
 }
 
-template< typename Logger >
-void
-Benchmark< Logger >::
-newBenchmark( const String & title )
-{
-   logger.closeTable();
-   logger.writeTitle( title );
-}
-
-template< typename Logger >
-void
-Benchmark< Logger >::
-newBenchmark( const String & title,
-               MetadataMap metadata )
-{
-   logger.closeTable();
-   logger.writeTitle( title );
-   // add loops and reset flag to metadata
-   metadata["loops"] = convertToString(loops);
-   metadata["reset"] = convertToString( reset );
-   metadata["minimal test time"] = convertToString( minTime );
-   logger.writeMetadata( metadata );
-}
-
 template< typename Logger >
 void
 Benchmark< Logger >::
diff --git a/src/TNL/Benchmarks/CustomLogging.h b/src/TNL/Benchmarks/CustomLogging.h
index dc859f254..d734ecbe1 100644
--- a/src/TNL/Benchmarks/CustomLogging.h
+++ b/src/TNL/Benchmarks/CustomLogging.h
@@ -26,29 +26,6 @@ public:
    // inherit constructors
    using Logging::Logging;
 
-   virtual void
-   writeTitle( const std::string & title ) override
-   {
-      if( verbose )
-         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
-      log << ": title = " << title << std::endl;
-   }
-
-   virtual void
-   writeMetadata( const MetadataMap & metadata ) override
-   {
-      if( verbose )
-         std::cout << "properties:" << std::endl;
-
-      for( auto & it : metadata ) {
-         if( verbose )
-            std::cout << "   " << it.first << " = " << it.second << std::endl;
-         log << ": " << it.first << " = " << it.second << std::endl;
-      }
-      if( verbose )
-         std::cout << std::endl;
-   }
-
    void
    writeTableHeader( const std::string & spanningElement,
                      const HeaderElements & subElements )
@@ -152,13 +129,6 @@ public:
       log << message << std::endl;
    }
 
-   virtual void
-   closeTable() override
-   {
-      log << std::endl;
-      header_changed = true;
-   }
-
 protected:
    // manual double -> string conversion with fixed precision
    static std::string
diff --git a/src/TNL/Benchmarks/JsonLogging.h b/src/TNL/Benchmarks/JsonLogging.h
index 948313470..d74ecbdf2 100644
--- a/src/TNL/Benchmarks/JsonLogging.h
+++ b/src/TNL/Benchmarks/JsonLogging.h
@@ -26,24 +26,6 @@ public:
    // inherit constructors
    using Logging::Logging;
 
-   virtual void
-   writeTitle( const std::string & title ) override
-   {
-      if( verbose )
-         std::cout << std::endl << "== " << title << " ==" << std::endl << std::endl;
-   }
-
-   virtual void
-   writeMetadata( const MetadataMap & metadata ) override
-   {
-      if( verbose ) {
-         std::cout << "properties:" << std::endl;
-         for( auto & it : metadata )
-            std::cout << "   " << it.first << " = " << it.second << std::endl;
-         std::cout << std::endl;
-      }
-   }
-
    void writeHeader( const HeaderElements& headerElements, const WidthHints& widths )
    {
       TNL_ASSERT_EQ( headerElements.size(), widths.size(), "elements must have equal sizes" );
@@ -136,12 +118,6 @@ public:
       log << "}" << std::endl;
    }
 
-   virtual void
-   closeTable() override
-   {
-      header_changed = true;
-   }
-
 protected:
    // manual double -> string conversion with fixed precision
    static std::string
diff --git a/src/TNL/Benchmarks/Logging.h b/src/TNL/Benchmarks/Logging.h
index 88b45d0fa..693d02ee7 100644
--- a/src/TNL/Benchmarks/Logging.h
+++ b/src/TNL/Benchmarks/Logging.h
@@ -79,7 +79,6 @@ class Logging
 {
 public:
    using MetadataElement = std::pair< std::string, std::string >;
-   using MetadataMap = std::map< std::string, std::string >;
    using MetadataColumns = std::vector< MetadataElement >;
 
    using HeaderElements = std::vector< std::string >;
@@ -115,10 +114,6 @@ public:
       return verbose;
    }
 
-   virtual void writeTitle( const std::string& title ) = 0;
-
-   virtual void writeMetadata( const MetadataMap & metadata ) = 0;
-
    virtual void setMetadataColumns( const MetadataColumns& elements )
    {
       // check if a header element changed (i.e. a first item of the pairs)
@@ -173,8 +168,6 @@ public:
 
    virtual void writeErrorMessage( const std::string& message ) = 0;
 
-   virtual void closeTable() = 0;
-
 protected:
    std::ostream& log;
    int verbose = 0;
-- 
GitLab


From e8bfeffb5559ea2f4687cf3c3d3244519f3c3ddc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Mon, 15 Nov 2021 13:42:53 +0100
Subject: [PATCH 37/40] Benchmarks: switch to JSON by default and adjust
 metadata column widths appropriately

---
 src/Benchmarks/BLAS/tnl-benchmark-blas.h | 6 ++++++
 src/Benchmarks/SpMV/spmv.h               | 2 +-
 src/TNL/Benchmarks/Benchmarks.h          | 4 ++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
index 67f40f941..ca9ffbb00 100644
--- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h
+++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h
@@ -35,6 +35,12 @@ runBlasBenchmarks( Benchmark<> & benchmark,
                    const std::size_t & maxSize,
                    const double & sizeStepFactor )
 {
+   benchmark.setMetadataWidths({
+      { "operation", 30 },
+      { "performer", 21 },
+      { "precision", 10 },
+   });
+
    // Array operations
    std::cout << "\n== Array operations ==\n" << std::endl;
    for( std::size_t size = minSize; size <= maxSize; size *= 2 ) {
diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index dab0d2d93..ff7fecfbb 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -738,7 +738,7 @@ benchmarkSpmv( BenchmarkType& benchmark,
    });
    benchmark.setMetadataWidths({
       { "matrix name", 32 },
-      { "format", 35 },
+      { "format", 46 },
    });
 
    HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index 979f8b10a..d93505bd2 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -13,7 +13,7 @@
 
 #pragma once
 
-#include "CustomLogging.h"
+#include "JsonLogging.h"
 
 #include <limits>
 
@@ -59,7 +59,7 @@ struct BenchmarkResult
    }
 };
 
-template< typename Logger = CustomLogging >
+template< typename Logger = JsonLogging >
 class Benchmark
 {
    public:
-- 
GitLab


From f07915f13f06bcf63c68b3b534a80d85f33426cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 17 Nov 2021 16:04:16 +0100
Subject: [PATCH 38/40] Refactored setters/getters in the Benchmark class

---
 src/TNL/Benchmarks/Benchmarks.h   |  8 +++-----
 src/TNL/Benchmarks/Benchmarks.hpp | 18 +++++++++---------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/TNL/Benchmarks/Benchmarks.h b/src/TNL/Benchmarks/Benchmarks.h
index d93505bd2..4036fa6d4 100644
--- a/src/TNL/Benchmarks/Benchmarks.h
+++ b/src/TNL/Benchmarks/Benchmarks.h
@@ -73,11 +73,11 @@ class Benchmark
 
       void setup( const Config::ParameterContainer& parameters );
 
-      // TODO: ensure that this is not called in the middle of the benchmark
-      // (or just remove it completely?)
       void setLoops( int loops );
 
-      void setMinTime( const double& minTime );
+      void setMinTime( double minTime );
+
+      bool isResetingOn() const;
 
       // Sets metadata columns -- values used for all subsequent rows until
       // the next call to this function.
@@ -143,8 +143,6 @@ class Benchmark
 
       double getBaseTime() const;
 
-      bool isResetingOn() const;
-
    protected:
       Logger logger;
 
diff --git a/src/TNL/Benchmarks/Benchmarks.hpp b/src/TNL/Benchmarks/Benchmarks.hpp
index 9935fd9ae..8aa3ae9c1 100644
--- a/src/TNL/Benchmarks/Benchmarks.hpp
+++ b/src/TNL/Benchmarks/Benchmarks.hpp
@@ -63,11 +63,19 @@ setLoops( int loops )
 template< typename Logger >
 void
 Benchmark< Logger >::
-setMinTime( const double& minTime )
+setMinTime( double minTime )
 {
    this->minTime = minTime;
 }
 
+template< typename Logger >
+bool
+Benchmark< Logger >::
+isResetingOn() const
+{
+   return reset;
+}
+
 template< typename Logger >
 void
 Benchmark< Logger >::
@@ -222,13 +230,5 @@ getBaseTime() const
    return baseTime;
 }
 
-template< typename Logger >
-bool
-Benchmark< Logger >::
-isResetingOn() const
-{
-   return reset;
-}
-
 } // namespace Benchmarks
 } // namespace TNL
-- 
GitLab


From b0016afe242d917533cd4b520f3cde9477c1ac26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 17 Nov 2021 17:03:43 +0100
Subject: [PATCH 39/40] Python: added modules for benchmark scripts based on
 JSON and a general post-processing script

---
 src/Python/BenchmarkLogs.py        | 116 ++++++++++++++++++++++++++
 src/Python/BenchmarkPlots.py       | 129 +++++++++++++++++++++++++++++
 src/Python/CMakeLists.txt          |   2 +
 src/Tools/CMakeLists.txt           |   1 +
 src/Tools/tnl-benchmark-to-html.py |  93 +++++++++++++++++++++
 5 files changed, 341 insertions(+)
 create mode 100644 src/Python/BenchmarkLogs.py
 create mode 100644 src/Python/BenchmarkPlots.py
 create mode 100755 src/Tools/tnl-benchmark-to-html.py

diff --git a/src/Python/BenchmarkLogs.py b/src/Python/BenchmarkLogs.py
new file mode 100644
index 000000000..8c9204098
--- /dev/null
+++ b/src/Python/BenchmarkLogs.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python3
+
+__all__ = [
+    "dict_to_html_table",
+    "get_benchmark_metadata",
+    "get_benchmark_dataframes",
+]
+
+import os.path
+import json
+import pandas
+
+def dict_to_html_table(data):
+    html = "<table border=1>\n"
+    html += "<tbody>\n"
+    for key in sorted(data.keys()):
+        html += f"\t<tr><td>{key}</td><td>{data[key]}</td></tr>\n"
+    html += "</tbody>\n"
+    html += "</table>\n"
+    return html
+
+def get_benchmark_metadata(filename):
+    """
+    Reads metadata of the benchmark in the given file.
+
+    :param str filename: path of the file with metadata or benchmark results.
+        - If it ends with ".metadata.json", metadata is read from that file.
+        - Otherwise, the extension is first replaced with ".metadata.json".
+    :returns: dict as returned by json.load, or None if the file does not exist.
+    """
+    if not filename.endswith(".metadata.json"):
+        filename = os.path.splitext(filename)[0] + ".metadata.json"
+    if os.path.isfile(filename):
+        print(f"Parsing metadata from file {filename}")
+        return json.load(open(filename, "r"))
+    print(f"Metadata file {filename} does not exist")
+    return None
+
+def get_benchmark_dataframe(logFile):
+    """
+    Get pandas dataframe with benchmark results stored in the given log file.
+
+    :param logFile: path to the log file
+    :returns: pandas.DataFrame instance
+    """
+    print(f"Parsing input file {logFile}")
+    df = pandas.read_json(open(logFile, "r"), orient="records", lines=True)
+
+    # convert "N/A" in the speedup column to nan
+    if "speedup" in df.columns:
+        df["speedup"] = pandas.to_numeric(df["speedup"], errors="coerce")
+
+    return df
+
+def gen_dataframes_per_operation(logFile, header_elements=None):
+    """
+    Reads benchmark results stored in the given log file and splits them into
+    multiple dataframes according to the "operation" column.
+
+    Various post-processing steps are done on each partial dataframe:
+    - columns with only NaN values are removed
+    - the operation column is removed
+    - the "index" and "columns" of the dataframe are set:
+        - if header_elements are given, they are set as "columns" and everything
+          else is used for the index
+        - otherwise, all columns in the dataframe before "time" are used for
+          the index, and the remaining columns (starting with "time") stay as
+          "columns"
+    - the "performer" column is set as the last column of the index
+    - note that the index is not explicitly sorted, so data is ordered as in the
+      input file
+
+    :param logFile: path to the log file
+    :yields: pairs of (str, pandas.DataFrame) object, where the str denotes the
+             particular operation name
+    """
+    main_df = get_benchmark_dataframe(logFile)
+
+    # check if there is at least one operation
+    if "operation" not in main_df.columns:
+        yield "Dummy operation", main_df
+        return
+
+    # extract all benchmark operations, preserve their order as found in the dataframe
+    operations = []
+    for op in main_df["operation"]:
+        if op not in operations:
+            operations.append(op)
+
+    # set operation as index
+    main_df = main_df.set_index("operation")
+
+    # if header_elements was not provided, we assume that "time" and all following columns
+    # are benchmark results, and all preceding columns are metadata columns that will be
+    # set as index of the dataframe
+    if header_elements is None:
+        header_elements = list(main_df.columns)
+        header_elements = header_elements[header_elements.index("time"):]
+        # FIXME: the "rows" and "columns" (in the gemv operation) are parsed after the correct header elements, because the preceding operations don't have these metadata columns
+        # TODO: each benchmark should record the header elements in the metadata file
+        header_elements = [e for e in header_elements if e not in ["rows", "columns"]]
+
+    # emit one df per operation
+    for op in operations:
+        df = main_df.loc[op]
+        # remove columns with only NaNs
+        df = df.dropna(axis=1, how="all")
+        # remove the operation column (index)
+        df = df.reset_index(drop=True)
+        # prepare index_columns and make sure that performer is the last
+        index_columns = [c for c in df.columns if c not in header_elements and c != "performer"]
+        index_columns.append("performer")
+        # set new index for the df: all columns except header_elements
+        df = df.set_index(index_columns)
+        # emit a pair (op, df)
+        yield op, df
diff --git a/src/Python/BenchmarkPlots.py b/src/Python/BenchmarkPlots.py
new file mode 100644
index 000000000..19a6e94f6
--- /dev/null
+++ b/src/Python/BenchmarkPlots.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python3
+
+__all__ = [
+    "plot_bandwidth_vs_size",
+    "heatmaps_bandwidth",
+    "get_image_html_tag",
+]
+
+import numpy
+import matplotlib.pyplot as plt
+from cycler import cycler
+import io
+import base64
+
+custom_cycler = cycler(linestyle=["-", "--", ":", "-."]) * cycler("color", ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"])
+
+def plot_bandwidth_vs_size(df, size_name="size", prop_cycler=custom_cycler, **kwargs):
+    """
+    Creates a bandwidth-size plot. The "size" data are expected in the index of
+    the dataframe, all other columns of the index are used for labels of the
+    graph lines.
+
+    :param df: a pandas.DataFrame instance
+    :param size_name: name of the "size" column in the index
+    :param prop_cycler:
+        property cycler for the graph lines, see the documentation for details:
+        https://matplotlib.org/stable/tutorials/intermediate/color_cycle.html
+    :param kwargs:
+        optional keyword arguments passed to matplotlib's errorbar function
+    :returns: a tuple (fig, ax) as returned by plt.subplots()
+    """
+    # prepare the dataframe
+    assert "bandwidth" in df.columns
+    assert size_name in df.index.names
+    df = df.reset_index(level=size_name).sort_index()
+
+    # set default parameters for the plot
+    kwargs.setdefault("capsize", 4)
+
+    # plot the graph
+    fig, ax = plt.subplots()
+    ax.set_xlabel(size_name)
+    ax.set_ylabel("bandwidth [GiB/s]")
+    ax.set_prop_cycle(prop_cycler)
+    for idx in df.index.unique():
+        part = df.loc[idx]
+        err = part["bandwidth"] * part["stddev/time"]
+        ax.errorbar(part[size_name], part["bandwidth"], yerr=err, label=", ".join(idx), **kwargs)
+    # see https://stackoverflow.com/a/43439132
+    ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left", borderaxespad=0.)
+
+    return fig, ax
+
+def heatmaps_bandwidth(df, x_name="columns", y_name="rows", *, cbar_kw=None, **kwargs):
+    """
+    Creates heatmaps two-dimensional data of bandwidth. The "size" data (i.e.
+    x_name and y_name) are expected in the index of the dataframe, all other
+    columns of the index are used to label the heatmaps. Heatmaps are generated
+    using the Python generator interface for each unique tuple of dataframe
+    index values.
+
+    :param df: a pandas.DataFrame instance
+    :param x_name: name of the column in the index to map along the x-axis
+    :param y_name: name of the column in the index to map along the y-axis
+    :param cbar_kw:
+        optional dict of arguments passed to matplotlib's colorbar function
+    :param kwargs:
+        optional keyword arguments passed to matplotlib's imshow function
+    :returns: a tuple (fig, ax) as returned by plt.subplots()
+    """
+    # prepare the dataframe
+    assert "bandwidth" in df.columns
+    assert x_name in df.index.names
+    assert y_name in df.index.names
+    df = df.reset_index(level=[x_name, y_name]).sort_index()
+
+    if cbar_kw is None:
+        cbar_kw = {}
+
+    for idx in df.index.unique():
+        # drop the index
+        part = df.loc[idx].reset_index(drop=True)
+        # get just the data we need
+        part = part[[x_name, y_name, "bandwidth"]].set_index([y_name, x_name])
+        # convert to a 2D array
+        bandwidth = part.stack().unstack(level=x_name)
+        # remove the column full of "bandwidth" from the index
+        bandwidth = bandwidth.reset_index(level=1, drop=True)
+
+        # figure setup
+        fig, ax = plt.subplots()
+        ax.set_xlabel(x_name)
+        ax.set_ylabel(y_name)
+        label = ", ".join(idx)
+        ax.set_title(f"{label} bandwidth [GiB/s]")
+
+        # plot the heatmap and colorbar
+        im = ax.imshow(bandwidth, interpolation=None, **kwargs)
+        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
+        cbar.ax.set_ylabel("bandwidth", rotation=-90, va="bottom")
+
+        # set ticks and their labels
+        ax.set_xticks(numpy.arange(len(bandwidth.columns)))
+        ax.set_yticks(numpy.arange(len(bandwidth.index)))
+        ax.set_xticklabels(int(n) for n in bandwidth.columns)
+        ax.set_yticklabels(int(n) for n in bandwidth.index)
+
+        # rotate xtick labels and set their alignment
+        plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
+
+        yield fig, ax
+
+def get_image_html_tag(fig, format="svg"):
+    """
+    Returns an HTML tag with embedded image data in the given format.
+
+    :param fig: a matplotlib figure instance
+    :param format: output image format (passed to fig.savefig)
+    """
+    stream = io.BytesIO()
+    # bbox_inches: expand the canvas to include the legend that was put outside the plot
+    # see https://stackoverflow.com/a/43439132
+    fig.savefig(stream, format=format, bbox_inches="tight")
+    data = stream.getvalue()
+
+    if format == "svg":
+        return data.decode("utf-8")
+    data = base64.b64encode(data).decode("utf-8")
+    return f"<img src=\"data:image/{format};base64,{data}\">"
diff --git a/src/Python/CMakeLists.txt b/src/Python/CMakeLists.txt
index 505e5f194..87f2c9cc1 100644
--- a/src/Python/CMakeLists.txt
+++ b/src/Python/CMakeLists.txt
@@ -6,6 +6,8 @@ set( PYTHON_SITE_PACKAGES_DIR lib/python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION
 if( PYTHONINTERP_FOUND )
    CONFIGURE_FILE( "__init__.py.in" "__init__.py" )
    INSTALL( FILES ${CMAKE_CURRENT_BINARY_DIR}/__init__.py
+                  BenchmarkLogs.py
+                  BenchmarkPlots.py
                   LogParser.py
             DESTINATION ${PYTHON_SITE_PACKAGES_DIR}/TNL )
 endif()
diff --git a/src/Tools/CMakeLists.txt b/src/Tools/CMakeLists.txt
index deb03b475..84a05dd9c 100644
--- a/src/Tools/CMakeLists.txt
+++ b/src/Tools/CMakeLists.txt
@@ -79,5 +79,6 @@ INSTALL( TARGETS tnl-init
          DESTINATION bin )
 
 INSTALL( PROGRAMS tnl-err2eoc
+                  tnl-benchmark-to-html.py
                   tnl-log-to-html.py
          DESTINATION bin )
diff --git a/src/Tools/tnl-benchmark-to-html.py b/src/Tools/tnl-benchmark-to-html.py
new file mode 100755
index 000000000..70c793491
--- /dev/null
+++ b/src/Tools/tnl-benchmark-to-html.py
@@ -0,0 +1,93 @@
+#!/usr/bin/python3
+
+import sys
+import os.path
+import matplotlib.pyplot as plt
+
+from TNL.BenchmarkLogs import *
+from TNL.BenchmarkPlots import *
+
+if len(sys.argv) < 2 or len(sys.argv) > 3:
+    print(f"""\
+usage: {sys.argv[0]} FILE.log [OUTPUT.html]
+
+where FILE.log contains one JSON record per line,
+and OUTPUT.html is the output file name (by default, OUTPUT=FILE).
+""", file=sys.stderr)
+    sys.exit(1)
+
+logFile = sys.argv[1]
+if len(sys.argv) > 2:
+    htmlFile = sys.argv[2]
+else:
+    htmlFile = os.path.splitext(logFile)[0] + ".html"
+
+
+metadata = get_benchmark_metadata(logFile)
+if metadata is not None and "title" in metadata:
+    title = metadata["title"]
+else:
+    title = os.path.splitext(os.path.basename(logFile))[0]
+dataframes = list(gen_dataframes_per_operation(logFile))
+
+print(f"Writing output to {htmlFile}")
+with open(htmlFile, 'w') as f:
+    print("<html>", file=f)
+    # add some basic style
+    print("""\
+<head>
+<meta charset="UTF-8">
+<style>
+    h1, h2 { border-bottom: solid 1px lightgray; }
+    table { border-collapse: collapse; }
+    table.benchmark td { text-align: end; }
+    th, td { padding: 2px; }
+</style>
+</head>
+<body>""", file=f)
+
+    print(f"<h1>{title}</h1>", file=f)
+    if metadata is not None:
+        print(dict_to_html_table(metadata), file=f)
+
+    # create a TOC
+    print(f"<h2>Table of contents</h2>", file=f)
+    print("<ol>", file=f)
+    for op, df in dataframes:
+        id = op.replace(" ", "_")
+        print(f"<li><a href=\"#{id}\">{op}</a></li>", file=f)
+    print("</ol>", file=f)
+
+    # formatters for specific columns of the table
+    formatters = {
+        "stddev": lambda value: f"{value:e}",
+        "bandwidth": lambda value: f"{value:.3f}",
+        "speedup": lambda value: f"{value:.3f}",
+    }
+
+    for op, df in dataframes:
+        # section heading
+        id = op.replace(" ", "_")
+        print(f"<h2 id=\"{id}\">{op}</h2>", file=f)
+        # table
+        print(df.to_html(classes="benchmark", formatters=formatters), file=f)
+
+        # graphs
+        size_name = None
+        if "size" in df.index.names:
+            size_name = "size"
+        elif "DOFs" in df.index.names:
+            size_name = "DOFs"
+        if size_name is not None:
+            fig, ax = plot_bandwidth_vs_size(df, size_name)
+            print(get_image_html_tag(fig, format="png"), file=f)
+            plt.close(fig)
+
+        # heatmaps
+        if "rows" in df.index.names and "columns" in df.index.names:
+            for fig, ax in heatmaps_bandwidth(df):
+                print(get_image_html_tag(fig, format="png"), file=f)
+                plt.close(fig)
+
+    print("</body>", file=f)
+    print("</html>", file=f)
-- 
GitLab


From 521dd4afa16dfbc53329244dd3e51ae063005f0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz>
Date: Wed, 17 Nov 2021 16:15:07 +0100
Subject: [PATCH 40/40] Python: marked the LogParser module and
 tnl-log-to-html.py script as deprecated

---
 src/Python/LogParser.py      | 5 +++++
 src/Tools/tnl-log-to-html.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/src/Python/LogParser.py b/src/Python/LogParser.py
index 0c327d71c..e3c9c672a 100644
--- a/src/Python/LogParser.py
+++ b/src/Python/LogParser.py
@@ -1,5 +1,10 @@
 #!/usr/bin/env python3
 
+import warnings
+warnings.warn("The CustomLogging format for TNL benchmarks is deprecated. Please switch your benchmark "
+              "to JsonLogging and use the tnl-benchmark-to-html.py script for post-processing.",
+              DeprecationWarning)
+
 import collections
 
 try:
diff --git a/src/Tools/tnl-log-to-html.py b/src/Tools/tnl-log-to-html.py
index bb9577298..a42c417c2 100755
--- a/src/Tools/tnl-log-to-html.py
+++ b/src/Tools/tnl-log-to-html.py
@@ -1,5 +1,10 @@
 #!/usr/bin/env python3
 
+import warnings
+warnings.warn("The CustomLogging format for TNL benchmarks is deprecated. Please switch your benchmark "
+              "to JsonLogging and use the tnl-benchmark-to-html.py script for post-processing.",
+              DeprecationWarning)
+
 import sys
 
 from TNL.LogParser import LogParser
-- 
GitLab