diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h
index 39973d0baa363935cdd9172c058923bcf550d6f1..13ba3a6d16c33391600037ee530d2172eb1d88e5 100644
--- a/src/Benchmarks/Benchmarks.h
+++ b/src/Benchmarks/Benchmarks.h
@@ -56,7 +56,7 @@ timeFunction( ComputeFunction compute,
 
    int i;
    for( i = 0;
-        i < loops && ( ! minTime || timer.getRealTime() < ( double ) minTime );
+        i < loops || timer.getRealTime() < ( double ) minTime;
         ++i) 
    {
       // abuse the monitor's "time" for loops
diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
new file mode 100644
index 0000000000000000000000000000000000000000..3302c4cb9407eae7d1feb4fe6b49f6ebc18adaff
--- /dev/null
+++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h
@@ -0,0 +1,137 @@
+/***************************************************************************
+                          WriteOne.h  -  description
+                             -------------------
+    begin                : Dec 19, 2018
+    copyright            : (C) 2018 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+// Implemented by: Tomas Oberhuber
+
+#pragma once
+
+#include <TNL/ParallelFor.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Containers/Vector.h>
+
+namespace TNL {
+   namespace Benchmarks {
+      
+
+template< int Dimension,
+          typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark{};
+
+template< typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark< 1, Device, Real, Index >
+{
+   public:
+      
+      using Vector = Containers::Vector< Real, Device, Index >;
+      
+      GridTraversersBenchmark( Index size )
+      :v( size ), size( size )
+      {}
+      
+      void writeOne()
+      {
+         
+         auto f = [] __cuda_callable__ ( Index i, Real* data )
+         {
+            data[ i ] = i;
+         };
+         
+         ParallelFor< Device >::exec( ( Index ) 0, size, f, v.getData() );
+      }
+      
+      protected:
+         
+         Index size;
+         Vector v;
+};
+
+
+template< typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark< 2, Device, Real, Index >
+{
+   public:
+      
+      using Vector = Containers::Vector< Real, Device, Index >;
+      
+      GridTraversersBenchmark( Index size )
+      :size( size ), v( size * size )  { }
+      
+      void writeOne()
+      {
+         Index _size = this->size;
+         auto f = [=] __cuda_callable__ ( Index i, Index j,  Real* data )
+         {
+            data[ i * _size + j ] = i + j;
+         };
+         
+         ParallelFor2D< Device >::exec( ( Index ) 0,
+                                        ( Index ) 0,
+                                        this->size,
+                                        this->size,
+                                        f, v.getData() );
+      }
+
+   protected:
+        
+      Index size;
+      
+      Vector v;
+      
+};
+
+template< typename Device,
+          typename Real,
+          typename Index >
+class GridTraversersBenchmark< 3, Device, Real, Index >
+{
+   public:
+      
+      using Vector = Containers::Vector< Real, Device, Index >;
+      
+      GridTraversersBenchmark( Index size )
+      : size( size ), v( size * size * size ) {}
+      
+      void writeOne()
+      {
+         Index _size = this->size;
+         auto f = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data )
+         {
+            data[ ( i * _size + j ) * _size + k ] = i + j + k;
+         };
+         
+         ParallelFor3D< Device >::exec( ( Index ) 0, 
+                                        ( Index ) 0, 
+                                        ( Index ) 0, 
+                                        this->size,
+                                        this->size,
+                                        this->size,
+                                        f, v.getData() );         
+      }
+
+   protected:
+      
+      Index size;
+      Vector v;
+      
+};
+
+
+   } // namespace Benchmarks
+} // namespace TNL
+
+
+
diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
index 3e13d52dd934a5b47601aac405245e9525ed2c62..9b69a316397197e2b5aa83b7cc4951b35b39f8d3 100644
--- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
+++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h
@@ -107,6 +107,7 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
    
 
    Benchmark benchmark; //( loops, verbose );
+   benchmark.setup( parameters );
    Benchmark::MetadataMap metadata = getHardwareMetadata();
    runBenchmark< Dimension >( parameters, benchmark, metadata );
    
diff --git a/src/Benchmarks/scripts/run-tnl-benchmark-traversers b/src/Benchmarks/scripts/run-tnl-benchmark-traversers
new file mode 100644
index 0000000000000000000000000000000000000000..00cd1e1ac64f0a9318c7ea749aad7014ce4d8e20
--- /dev/null
+++ b/src/Benchmarks/scripts/run-tnl-benchmark-traversers
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+tnl-benchmark-traversers --dimension 1 --loops 1 --min-size 16 --max-size 100000 --min-time 1
+tnl-benchmark-traversers --dimension 2 --loops 1 --min-size 16 --max-size 10000 --min-time 1 --output-mode append
+tnl-benchmark-traversers --dimension 3 --loops 1 --min-size 16 --max-size 1000 --min-time 1 --output-mode append