From 524483f7ab1a57a8f3a99c9ed52fb741c3ca4641 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Fri, 21 Dec 2018 19:57:15 +0100
Subject: [PATCH] Added script for running traversers benchmark. Fixing
 traversers benchmark.

---
 src/Benchmarks/Benchmarks.h                   |   2 +-
 .../Traversers/GridTraversersBenchmark.h      | 137 ++++++++++++++++++
 .../Traversers/tnl-benchmark-traversers.h     |   1 +
 .../scripts/run-tnl-benchmark-traversers      |   5 +
 4 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 src/Benchmarks/Traversers/GridTraversersBenchmark.h
 create mode 100644 src/Benchmarks/scripts/run-tnl-benchmark-traversers

diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 39973d0baa..13ba3a6d16 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -56,7 +56,7 @@ timeFunction( ComputeFunction compute,
 
    int i;
    for( i = 0;
-        i < loops && ( ! minTime || timer.getRealTime() < ( double ) minTime );
+        i < loops || timer.getRealTime() < ( double ) minTime;
         ++i) 
    {
       // abuse the monitor's "time" for loops
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
new file mode 100644
index 0000000000..3302c4cb94
--- /dev/null
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
@@ -0,0 +1,137 @@
+/***************************************************************************
+                          WriteOne.h  -  description
+                             -------------------
+    begin                : Dec 19, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber
+
+#pragma once
+
+#include <TNL/ParallelFor.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Benchmarks {
+      
+
+template< int Dimension,
+          typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark{};
+
+template< typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark< 1, Device, Real, Index >
+{
+   public:
+      
+      using Vector = Containers::Vector< Real, Device, Index >;
+      
+      GridTraversersBenchmark( Index size )
+      :v( size ), size( size )
+      {}
+      
+      void writeOne()
+      {
+         
+         auto f = [] __cuda_callable__ ( Index i, Real* data )
+         {
+            data[ i ] = i;
+         };
+         
+         ParallelFor< Device >::exec( ( Index ) 0, size, f, v.getData() );
+      }
+      
+      protected:
+         
+         Index size;
+         Vector v;
+};
+
+
+template< typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark< 2, Device, Real, Index >
+{
+   public:
+      
+      using Vector = Containers::Vector< Real, Device, Index >;
+      
+      GridTraversersBenchmark( Index size )
+      :size( size ), v( size * size )  { }
+      
+      void writeOne()
+      {
+         Index _size = this->size;
+         auto f = [=] __cuda_callable__ ( Index i, Index j,  Real* data )
+         {
+            data[ i * _size + j ] = i + j;
+         };
+         
+         ParallelFor2D< Device >::exec( ( Index ) 0,
+                                        ( Index ) 0,
+                                        this->size,
+                                        this->size,
+                                        f, v.getData() );
+      }
+
+   protected:
+        
+      Index size;
+      
+      Vector v;
+      
+};
+
+template< typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark< 3, Device, Real, Index >
+{
+   public:
+      
+      using Vector = Containers::Vector< Real, Device, Index >;
+      
+      GridTraversersBenchmark( Index size )
+      : size( size ), v( size * size * size ) {}
+      
+      void writeOne()
+      {
+         Index _size = this->size;
+         auto f = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data )
+         {
+            data[ ( i * _size + j ) * _size + k ] = i + j + k;
+         };
+         
+         ParallelFor3D< Device >::exec( ( Index ) 0, 
+                                        ( Index ) 0, 
+                                        ( Index ) 0, 
+                                        this->size,
+                                        this->size,
+                                        this->size,
+                                        f, v.getData() );         
+      }
+
+   protected:
+      
+      Index size;
+      Vector v;
+      
+};
+
+
+   } // namespace Benchmarks
+} // namespace TNL
+
+
+
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 3e13d52dd9..9b69a31639 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -107,6 +107,7 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
    
 
    Benchmark benchmark; //( loops, verbose );
+   benchmark.setup( parameters );
    Benchmark::MetadataMap metadata = getHardwareMetadata();
    runBenchmark< Dimension >( parameters, benchmark, metadata );
    
diff --git a/src/Benchmarks/scripts/run-tnl-benchmark-traversers b/src/Benchmarks/scripts/run-tnl-benchmark-traversers
new file mode 100644
index 0000000000..00cd1e1ac6
--- /dev/null
+++ b/src/Benchmarks/scripts/run-tnl-benchmark-traversers
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+tnl-benchmark-traversers --dimension 1 --loops 1 --min-size 16 --max-size 100000 --min-time 1
+tnl-benchmark-traversers --dimension 2 --loops 1 --min-size 16 --max-size 10000 --min-time 1 --output-mode append
+tnl-benchmark-traversers --dimension 3 --loops 1 --min-size 16 --max-size 1000 --min-time 1 --output-mode append
-- 
GitLab