From 8ab1f885ccfa07b85b820c16874cf775927ea585 Mon Sep 17 00:00:00 2001
From: Hayeu Yury <hayeuyur@fit.cvut.cz>
Date: Mon, 18 Oct 2021 10:13:11 +0200
Subject: [PATCH 01/29] Add Docker image as development environment

---
 Dockerfile | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 Dockerfile
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000..246faf448
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM nvidia/cuda:11.4.2-devel-ubuntu20.04
+
+# Fix timezone
+ENV TZ=Europe/Prague
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt-get update
+RUN apt-get install -y --no-install-recommends g++ gcc make cmake zlib1g-dev libjpeg-dev libpng-dev libtinyxml2-dev
+RUN apt-get install -y --no-install-recommends git openssh-client subversion procps
+RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db
+RUN apt-get install -y --no-install-recommends zsh ca-certificates curl netbase wget
+
+RUN git config --global alias.ll 'log --oneline --graph --all --decorate'
+RUN git config --global core.editor 'vim'
+
+WORKDIR /workspace
-- 
GitLab


From 27d0c8be4a77f64765fa282062de405b83b6ce6e Mon Sep 17 00:00:00 2001
From: Hayeu Yury <hayeuyur@fit.cvut.cz>
Date: Mon, 18 Oct 2021 13:09:55 +0200
Subject: [PATCH 02/29] Basic implementation of the HeatEquasion for host
 device

---
 src/Benchmarks/CMakeLists.txt                 |  1 +
 .../HeatEquationGrid/CMakeLists.txt           |  1 +
 .../HeatmapParallelFor/CMakeLists.txt         |  1 +
 .../HeatmapParallelFor/main.cpp               | 87 +++++++++++++++++++
 .../HeatmapParallelFor/main.cu                |  0
 .../HeatmapParallelFor/main.h                 | 42 +++++++++
 src/Benchmarks/HeatEquationGrid/README.md     |  2 +
 7 files changed, 134 insertions(+)
 create mode 100644 src/Benchmarks/HeatEquationGrid/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
 create mode 100644 src/Benchmarks/HeatEquationGrid/README.md

diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 0fc8e0f02..6fa019081 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_subdirectory( HeatEquation )
+add_subdirectory( HeatEquationGrid )
 add_subdirectory( BLAS )
 add_subdirectory( NDArray )
 add_subdirectory( SpMV )
diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
new file mode 100644
index 000000000..867619052
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
@@ -0,0 +1 @@
+ADD_SUBDIRECTORY( HeatmapParallelFor )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
new file mode 100644
index 000000000..35fbf34c9
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
@@ -0,0 +1 @@
+add_executable( benchmark_heat_parallel_for main.h main.cpp )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
new file mode 100644
index 000000000..2f4d61512
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
@@ -0,0 +1,87 @@
+#include "main.h"
+
+TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig()
+{
+  TNL::Config::ConfigDescription config;
+
+  config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
+  config.addEntryEnum<TNL::String>("host");
+
+#ifdef HAVE_CUDA
+  config.addEntryEnum<TNL::String>("cuda");
+#endif
+
+  config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
+  config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
+
+  config.addEntry<double>("domain-x-size", "Domain size along x-axis.", 2.0);
+  config.addEntry<double>("domain-y-size", "Domain size along y-axis.", 2.0);
+
+  config.addEntry<double>("sigma", "Sigma in exponential initial condition.", 2.0);
+
+  config.addEntry<double>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
+  config.addEntry<double>("final-time", "Final time of the simulation.", 1.0);
+  config.addEntry<bool>("verbose", "Verbose mode.", true);
+
+  return config;
+}
+
+HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer& parameters):
+  xSize(parameters.getParameter<int>("grid-x-size")),
+  ySize(parameters.getParameter<int>("grid-y-size")),
+  xDomainSize(parameters.getParameter<double>("domain-x-size")),
+  yDomainSize(parameters.getParameter<double>("domain-y-size")),
+  sigma(parameters.getParameter<double>("sigma")),
+  timeStep(parameters.getParameter<double>("time-step")),
+  finalTime(parameters.getParameter<double>("final-time")),
+  verbose(parameters.getParameter<bool>("verbose")) {}
+
+bool HeatmapSolver::Solve(const HeatmapSolver::Parameters& params, TNL::Timer& timer) const {
+  // This is always an external storage for grid.
+  TNL::Container::Array ux(params.xSize * params.ySize),  // data at step u
+                        aux(params.xSize * params.ySize); // data at step u + 1
+
+  // Invalidate ux/aux
+  ux = 0;
+  aux = 0;
+
+  const double hx = params.xDomainSize / (double)params.xSize;
+  const double hy = params.yDomainSize / (double)params.ySize;
+
+  auto uxView = ux.getView(), auxView = aux.getView();
+
+  timer.reset();
+
+  // TODO: - Initial Condition
+
+
+  auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) {
+    aux[i] = 0;
+    aux[(params.ySize - 1) * params.xSize + i] = 0;
+  };
+
+  auto verticalBoundaryCondition = [=] __cuda_callable__(int i) {
+    aux[j * params.ySize] = 0;
+    aux[j * params.ySize + params.xSize - 1] = 0;
+  };
+
+  auto next = [=] __cuda_callable__(int i, int j) {
+    auto index = j * params.ySize + i;
+
+    aux[index] = (u[c - 1] - 2 * u[c] + u[c + 1]) / hx +
+                 (u[c - params.xSize] - 2 * u[c] + u[c + params.xSize]) / hy;
+  };
+
+  double time = 0;
+
+  while (time < params.finalTime) {
+    // TODO: - Do we really need this
+    TNL::Algorithm::ParallelFor(0, params.xSize, horizontalBoundaryCondition);
+    TNL::Algorithm::ParallelFor(0, params.ySize, verticalBoundaryCondition);
+    TNL::Algorithm::ParallelFor2D(1, 1, params.xSize - 1, params.ySize - 1, next);
+
+    time += params.timeStep;
+  }
+
+  return false;
+}
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
new file mode 100644
index 000000000..8734f4d2a
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
@@ -0,0 +1,42 @@
+
+#include <iostream>
+#include <TNL/Config/parseCommandLine.h>
+#include <TNL/Containers/Array.h>
+#include <TNL/Timer.h>
+#include <TNL/Algorithms/ParallelFor.h>
+
+#pragma once
+
+class HeatmapSolver {
+  public:
+    class Parameters {
+      public:
+        const int xSize, ySize;
+        const double xDomainSize, yDomainSize;
+        const double sigma;
+        const double timeStep, finalTime;
+        const bool verbose;
+
+        Parameters(const TNL::Config::ParameterContainer& parameters);
+
+        static TNL::Config::ConfigDescription makeInputConfig();
+      private:
+    };
+
+    bool solve(const Parameters &parameters, TNL::Timer &timer) const;
+
+  private:
+};
+
+int main(int argc, char * argv[]) {
+  TNL::Timer timer;
+  auto config = HeatmapSolver::Parameters::makeInputConfig();
+
+  TNL::Config::ParameterContainer parameters;
+  if (!parseCommandLine(argc, argv, config, parameters))
+    return EXIT_FAILURE;
+
+  auto params = HeatmapSolver::Parameters(parameters);
+
+  return 0;
+}
diff --git a/src/Benchmarks/HeatEquationGrid/README.md b/src/Benchmarks/HeatEquationGrid/README.md
new file mode 100644
index 000000000..e5ba7a298
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/README.md
@@ -0,0 +1,2 @@
+This is a temporary folder for grid refactoring testing.
+
-- 
GitLab


From 8379a52cdc65721e292afe39c8530a9615d484a9 Mon Sep 17 00:00:00 2001
From: Hayeu Yury <hayeuyur@MacBook-Pro-7.local>
Date: Wed, 20 Oct 2021 17:14:02 +0200
Subject: [PATCH 03/29] Add device support to implementation

---
 Dockerfile                                    |   2 +-
 .../HeatmapParallelFor/main.cpp               |  87 +-------
 .../HeatmapParallelFor/main.cu                |   2 +
 .../HeatmapParallelFor/main.h                 | 191 ++++++++++++++++--
 4 files changed, 180 insertions(+), 102 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 246faf448..e71c1e1b6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,7 +7,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 RUN apt-get update
 RUN apt-get install -y --no-install-recommends g++ gcc make cmake zlib1g-dev libjpeg-dev libpng-dev libtinyxml2-dev
 RUN apt-get install -y --no-install-recommends git openssh-client subversion procps
-RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db
+RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db gnuplot
 RUN apt-get install -y --no-install-recommends zsh ca-certificates curl netbase wget
 
 RUN git config --global alias.ll 'log --oneline --graph --all --decorate'
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
index 2f4d61512..7a7a28956 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
@@ -1,87 +1,2 @@
-#include "main.h"
-
-TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig()
-{
-  TNL::Config::ConfigDescription config;
-
-  config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
-  config.addEntryEnum<TNL::String>("host");
-
-#ifdef HAVE_CUDA
-  config.addEntryEnum<TNL::String>("cuda");
-#endif
-
-  config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
-  config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
-
-  config.addEntry<double>("domain-x-size", "Domain size along x-axis.", 2.0);
-  config.addEntry<double>("domain-y-size", "Domain size along y-axis.", 2.0);
-
-  config.addEntry<double>("sigma", "Sigma in exponential initial condition.", 2.0);
-
-  config.addEntry<double>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
-  config.addEntry<double>("final-time", "Final time of the simulation.", 1.0);
-  config.addEntry<bool>("verbose", "Verbose mode.", true);
-
-  return config;
-}
-
-HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer& parameters):
-  xSize(parameters.getParameter<int>("grid-x-size")),
-  ySize(parameters.getParameter<int>("grid-y-size")),
-  xDomainSize(parameters.getParameter<double>("domain-x-size")),
-  yDomainSize(parameters.getParameter<double>("domain-y-size")),
-  sigma(parameters.getParameter<double>("sigma")),
-  timeStep(parameters.getParameter<double>("time-step")),
-  finalTime(parameters.getParameter<double>("final-time")),
-  verbose(parameters.getParameter<bool>("verbose")) {}
-
-bool HeatmapSolver::Solve(const HeatmapSolver::Parameters& params, TNL::Timer& timer) const {
-  // This is always an external storage for grid.
-  TNL::Container::Array ux(params.xSize * params.ySize),  // data at step u
-                        aux(params.xSize * params.ySize); // data at step u + 1
-
-  // Invalidate ux/aux
-  ux = 0;
-  aux = 0;
-
-  const double hx = params.xDomainSize / (double)params.xSize;
-  const double hy = params.yDomainSize / (double)params.ySize;
 
-  auto uxView = ux.getView(), auxView = aux.getView();
-
-  timer.reset();
-
-  // TODO: - Initial Condition
-
-
-  auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) {
-    aux[i] = 0;
-    aux[(params.ySize - 1) * params.xSize + i] = 0;
-  };
-
-  auto verticalBoundaryCondition = [=] __cuda_callable__(int i) {
-    aux[j * params.ySize] = 0;
-    aux[j * params.ySize + params.xSize - 1] = 0;
-  };
-
-  auto next = [=] __cuda_callable__(int i, int j) {
-    auto index = j * params.ySize + i;
-
-    aux[index] = (u[c - 1] - 2 * u[c] + u[c + 1]) / hx +
-                 (u[c - params.xSize] - 2 * u[c] + u[c + params.xSize]) / hy;
-  };
-
-  double time = 0;
-
-  while (time < params.finalTime) {
-    // TODO: - Do we really need this
-    TNL::Algorithm::ParallelFor(0, params.xSize, horizontalBoundaryCondition);
-    TNL::Algorithm::ParallelFor(0, params.ySize, verticalBoundaryCondition);
-    TNL::Algorithm::ParallelFor2D(1, 1, params.xSize - 1, params.ySize - 1, next);
-
-    time += params.timeStep;
-  }
-
-  return false;
-}
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu
index e69de29bb..7a7a28956 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
index 8734f4d2a..5aefc49f4 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
@@ -1,42 +1,203 @@
 
-#include <iostream>
-#include <TNL/Config/parseCommandLine.h>
-#include <TNL/Containers/Array.h>
-#include <TNL/Timer.h>
-#include <TNL/Algorithms/ParallelFor.h>
+#include<iostream>
+#include<fstream>
+#include<TNL/Config/parseCommandLine.h>
+#include<TNL/Containers/Array.h>
+#include<TNL/Algorithms/ParallelFor.h>
 
 #pragma once
 
+template<typename Real>
 class HeatmapSolver {
   public:
     class Parameters {
       public:
         const int xSize, ySize;
-        const double xDomainSize, yDomainSize;
-        const double sigma;
-        const double timeStep, finalTime;
+        const Real xDomainSize, yDomainSize;
+        const Real sigma;
+        const Real timeStep, finalTime;
         const bool verbose;
 
-        Parameters(const TNL::Config::ParameterContainer& parameters);
+        Parameters(const TNL::Config::ParameterContainer &parameters);
 
         static TNL::Config::ConfigDescription makeInputConfig();
-      private:
     };
 
-    bool solve(const Parameters &parameters, TNL::Timer &timer) const;
+    template <typename Device>
+    bool solve(const Parameters &parameters) const;
 
   private:
+    template <typename Device>
+    bool writeGNUPlot(const std::string &filename,
+                      const Parameters& parameters,
+                      const TNL::Containers::Array<Real, Device>& map) const;
 };
 
+template <typename Real>
+TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig()
+{
+  TNL::Config::ConfigDescription config;
+
+  config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
+  config.addEntryEnum<TNL::String>("host");
+
+#ifdef HAVE_CUDA
+  config.addEntryEnum<TNL::String>("cuda");
+#endif
+
+  config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
+  config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
+
+  config.addEntry<Real>("domain-x-size", "Domain size along x-axis.", 2.0);
+  config.addEntry<Real>("domain-y-size", "Domain size along y-axis.", 2.0);
+
+  config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 2.0);
+
+  config.addEntry<Real>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
+  config.addEntry<Real>("final-time", "Final time of the simulation.", 1.0);
+  config.addEntry<bool>("verbose", "Verbose mode.", true);
+
+  return config;
+}
+
+template<typename Real>
+HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContainer &parameters) : xSize(parameters.getParameter<int>("grid-x-size")),
+                                                                                                 ySize(parameters.getParameter<int>("grid-y-size")),
+                                                                                                 xDomainSize(parameters.getParameter<Real>("domain-x-size")),
+                                                                                                 yDomainSize(parameters.getParameter<Real>("domain-y-size")),
+                                                                                                 sigma(parameters.getParameter<Real>("sigma")),
+                                                                                                 timeStep(parameters.getParameter<Real>("time-step")),
+                                                                                                 finalTime(parameters.getParameter<Real>("final-time")),
+                                                                                                 verbose(parameters.getParameter<bool>("verbose")) {}
+
+/***
+ *
+ * ySize|j                                          (ySize - 1) * xSize + xSize - 1
+ *  |------------------------------------------------------
+ *  |
+ *  |
+ *  |
+ *  |
+ *  |
+ *  |
+ *  |
+ *  |
+ *  |
+ *  |------------------------------------------------------>
+ *
+ *  0                                                xSize|i
+ *
+ *  j * xSize + i
+ ***/
+template<typename Real>
+template<typename Device>
+bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) const
+{
+  // This is always an external storage for grid.
+  TNL::Containers::Array<Real, Device> ux(params.xSize * params.ySize),  // data at step u
+                                       aux(params.xSize * params.ySize); // data at step u + 1
+
+  // Invalidate ux/aux
+  ux = 0;
+  aux = 0;
+
+  const Real hx = params.xDomainSize / (Real)params.xSize;
+  const Real hy = params.yDomainSize / (Real)params.ySize;
+  const Real hx_inv = 1 / (hx * hx);
+  const Real hy_inv = 1 / (hy * hy);
+
+  auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+
+  auto uxView = ux.getView(),
+       auxView = aux.getView();
+
+  auto init = [=] __cuda_callable__(int i, int j) mutable {
+    auto index = j * params.xSize + i;
+
+    uxView[index] = exp(-params.sigma * (i * i + j * j));
+  };
+
+  TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, init);
+
+  if (!writeGNUPlot("data.txt", params, ux))
+    return false;
+
+  // auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) mutable {
+  //   auxView[i] = 0;
+  //   auxView[(params.ySize - 1) * params.xSize + i] = 0;
+  // };
+
+  // auto verticalBoundaryCondition = [=] __cuda_callable__(int j) mutable {
+  //   auxView[j * params.xSize] = 0;
+  //   auxView[j * params.xSize + params.xSize - 1] = 0;
+  // };
+
+  // TNL::Algorithms::ParallelFor<Device>::exec(0, params.xSize, horizontalBoundaryCondition);
+  // TNL::Algorithms::ParallelFor<Device>::exec(0, params.ySize, verticalBoundaryCondition);
+
+  auto next = [=] __cuda_callable__(int i, int j) mutable {
+    auto index = j * params.ySize + i;
+
+    auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
+                     (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv;
+  };
+
+  auto update = [=] __cuda_callable__(int i) mutable {
+    uxView[i] += auxView[i] * timestep;
+  };
+
+  Real start = 0;
+
+  while (start < params.finalTime)
+  {
+    TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, next);
+    TNL::Algorithms::ParallelFor<Device>::exec(0, params.xSize * params.ySize, update);
+
+    std::cout << "Iteration: " << start << std::endl;
+    start += timestep;
+  }
+
+  return writeGNUPlot("data_final.txt", params, ux);
+}
+
+template <typename Real>
+template <typename Device>
+bool HeatmapSolver<Real>::writeGNUPlot(const std::string &filename,
+                                       const HeatmapSolver<Real>::Parameters &params,
+                                       const TNL::Containers::Array<Real, Device> &map) const {
+  std::ofstream out(filename, std::ios::out);
+
+  if (!out.is_open())
+    return false;
+
+  for (int j = 0; j < params.ySize; j++)
+    for (int i = 0; i < params.xSize; i++)
+      out << i << " " << j << " " << map[j * params.xSize + i] << std::endl;
+
+  return out.good();
+}
+
 int main(int argc, char * argv[]) {
-  TNL::Timer timer;
-  auto config = HeatmapSolver::Parameters::makeInputConfig();
+  using Real = double;
+
+  auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
 
   TNL::Config::ParameterContainer parameters;
   if (!parseCommandLine(argc, argv, config, parameters))
     return EXIT_FAILURE;
 
-  auto params = HeatmapSolver::Parameters(parameters);
+  auto device = parameters.getParameter<TNL::String>("device");
+  auto params = HeatmapSolver<Real>::Parameters(parameters);
+
+  HeatmapSolver<Real> solver;
+
+  if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+    return EXIT_FAILURE;
+
+#ifdef HAVE_CUDA
+  if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+    return EXIT_FAILURE;
+#endif
 
-  return 0;
+  return EXIT_SUCCESS;
 }
-- 
GitLab


From a3d3f84f433b774b5792e171335d33d48fc2a43a Mon Sep 17 00:00:00 2001
From: Hayeu Yury <hayeuyur@macbook-pro.home>
Date: Wed, 20 Oct 2021 21:56:56 +0200
Subject: [PATCH 04/29] Implement fixes to the initial condition

---
 .../HeatmapParallelFor/CMakeLists.txt         |  6 ++++-
 .../HeatmapParallelFor/main.h                 | 27 ++++++-------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
index 35fbf34c9..80f43845f 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
@@ -1 +1,5 @@
-add_executable( benchmark_heat_parallel_for main.h main.cpp )
+if (BUILD_CUDA)
+  CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu )
+ELSE()
+  add_executable( benchmark_heat_parallel_for main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
index 5aefc49f4..bc6e47abc 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
@@ -51,10 +51,10 @@ TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig(
   config.addEntry<Real>("domain-x-size", "Domain size along x-axis.", 2.0);
   config.addEntry<Real>("domain-y-size", "Domain size along y-axis.", 2.0);
 
-  config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 2.0);
+  config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 1.0);
 
   config.addEntry<Real>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
-  config.addEntry<Real>("final-time", "Final time of the simulation.", 1.0);
+  config.addEntry<Real>("final-time", "Final time of the simulation.", 0.012);
   config.addEntry<bool>("verbose", "Verbose mode.", true);
 
   return config;
@@ -71,6 +71,7 @@ HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContaine
                                                                                                  verbose(parameters.getParameter<bool>("verbose")) {}
 
 /***
+ * Grid parameters:
  *
  * ySize|j                                          (ySize - 1) * xSize + xSize - 1
  *  |------------------------------------------------------
@@ -108,13 +109,15 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
 
-  auto uxView = ux.getView(),
-       auxView = aux.getView();
+  auto uxView = ux.getView(), auxView = aux.getView();
 
   auto init = [=] __cuda_callable__(int i, int j) mutable {
     auto index = j * params.xSize + i;
 
-    uxView[index] = exp(-params.sigma * (i * i + j * j));
+    auto x = i * hx - params.xDomainSize / 2;
+    auto y = j * hy - params.yDomainSize / 2;
+
+    uxView[index] = exp(params.sigma * (x * x + y * y));
   };
 
   TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, init);
@@ -122,19 +125,6 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
   if (!writeGNUPlot("data.txt", params, ux))
     return false;
 
-  // auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) mutable {
-  //   auxView[i] = 0;
-  //   auxView[(params.ySize - 1) * params.xSize + i] = 0;
-  // };
-
-  // auto verticalBoundaryCondition = [=] __cuda_callable__(int j) mutable {
-  //   auxView[j * params.xSize] = 0;
-  //   auxView[j * params.xSize + params.xSize - 1] = 0;
-  // };
-
-  // TNL::Algorithms::ParallelFor<Device>::exec(0, params.xSize, horizontalBoundaryCondition);
-  // TNL::Algorithms::ParallelFor<Device>::exec(0, params.ySize, verticalBoundaryCondition);
-
   auto next = [=] __cuda_callable__(int i, int j) mutable {
     auto index = j * params.ySize + i;
 
@@ -153,7 +143,6 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
     TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, next);
     TNL::Algorithms::ParallelFor<Device>::exec(0, params.xSize * params.ySize, update);
 
-    std::cout << "Iteration: " << start << std::endl;
     start += timestep;
   }
 
-- 
GitLab


From 5b772f3cd885760f9a06df29f8a1d7d80c60b7b7 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 21 Oct 2021 08:32:06 +0200
Subject: [PATCH 05/29] Fix code alignment. Remove Dockerfile

---
 Dockerfile                                    |  16 --
 .../HeatmapParallelFor/CMakeLists.txt         |   4 +-
 .../HeatmapParallelFor/main.h                 | 219 +++++++++---------
 3 files changed, 115 insertions(+), 124 deletions(-)
 delete mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index e71c1e1b6..000000000
--- a/Dockerfile
+++ /dev/null
@@ -1,16 +0,0 @@
-FROM nvidia/cuda:11.4.2-devel-ubuntu20.04
-
-# Fix timezone
-ENV TZ=Europe/Prague
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
-
-RUN apt-get update
-RUN apt-get install -y --no-install-recommends g++ gcc make cmake zlib1g-dev libjpeg-dev libpng-dev libtinyxml2-dev
-RUN apt-get install -y --no-install-recommends git openssh-client subversion procps
-RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db gnuplot
-RUN apt-get install -y --no-install-recommends zsh ca-certificates curl netbase wget
-
-RUN git config --global alias.ll 'log --oneline --graph --all --decorate'
-RUN git config --global core.editor 'vim'
-
-WORKDIR /workspace
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
index 80f43845f..054a28cf8 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (BUILD_CUDA)
-  CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu )
+   CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu )
 ELSE()
-  add_executable( benchmark_heat_parallel_for main.h main.cpp )
+   add_executable( benchmark_heat_parallel_for main.h main.cpp )
 ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
index bc6e47abc..311fc94e5 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
@@ -1,66 +1,68 @@
 
-#include<iostream>
-#include<fstream>
-#include<TNL/Config/parseCommandLine.h>
-#include<TNL/Containers/Array.h>
-#include<TNL/Algorithms/ParallelFor.h>
+#include <iostream>
+#include <fstream>
+#include <TNL/Config/parseCommandLine.h>
+#include <TNL/Containers/Array.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 #pragma once
 
-template<typename Real>
-class HeatmapSolver {
-  public:
-    class Parameters {
-      public:
-        const int xSize, ySize;
-        const Real xDomainSize, yDomainSize;
-        const Real sigma;
-        const Real timeStep, finalTime;
-        const bool verbose;
-
-        Parameters(const TNL::Config::ParameterContainer &parameters);
-
-        static TNL::Config::ConfigDescription makeInputConfig();
-    };
-
-    template <typename Device>
-    bool solve(const Parameters &parameters) const;
-
-  private:
-    template <typename Device>
-    bool writeGNUPlot(const std::string &filename,
-                      const Parameters& parameters,
-                      const TNL::Containers::Array<Real, Device>& map) const;
+template <typename Real>
+class HeatmapSolver
+{
+public:
+   class Parameters
+   {
+   public:
+      const int xSize, ySize;
+      const Real xDomainSize, yDomainSize;
+      const Real sigma;
+      const Real timeStep, finalTime;
+      const bool verbose;
+
+      Parameters(const TNL::Config::ParameterContainer &parameters);
+
+      static TNL::Config::ConfigDescription makeInputConfig();
+   };
+
+   template <typename Device>
+   bool solve(const Parameters &parameters) const;
+
+private:
+   template <typename Device>
+   bool writeGNUPlot(const std::string &filename,
+                     const Parameters &parameters,
+                     const TNL::Containers::Array<Real, Device> &map) const;
 };
 
 template <typename Real>
 TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig()
 {
-  TNL::Config::ConfigDescription config;
+   TNL::Config::ConfigDescription config;
 
-  config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
-  config.addEntryEnum<TNL::String>("host");
+   config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
+   config.addEntryEnum<TNL::String>("host");
 
 #ifdef HAVE_CUDA
-  config.addEntryEnum<TNL::String>("cuda");
+   config.addEntryEnum<TNL::String>("cuda");
 #endif
 
-  config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
-  config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
+   config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
+   config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
 
-  config.addEntry<Real>("domain-x-size", "Domain size along x-axis.", 2.0);
-  config.addEntry<Real>("domain-y-size", "Domain size along y-axis.", 2.0);
+   config.addEntry<Real>("domain-x-size", "Domain size along x-axis.", 2.0);
+   config.addEntry<Real>("domain-y-size", "Domain size along y-axis.", 2.0);
 
-  config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 1.0);
+   config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 1.0);
 
-  config.addEntry<Real>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
-  config.addEntry<Real>("final-time", "Final time of the simulation.", 0.012);
-  config.addEntry<bool>("verbose", "Verbose mode.", true);
+   config.addEntry<Real>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
+   config.addEntry<Real>("final-time", "Final time of the simulation.", 0.012);
+   config.addEntry<bool>("verbose", "Verbose mode.", true);
 
-  return config;
+   return config;
 }
 
-template<typename Real>
+template <typename Real>
 HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContainer &parameters) : xSize(parameters.getParameter<int>("grid-x-size")),
                                                                                                  ySize(parameters.getParameter<int>("grid-y-size")),
                                                                                                  xDomainSize(parameters.getParameter<Real>("domain-x-size")),
@@ -73,7 +75,7 @@ HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContaine
 /***
  * Grid parameters:
  *
- * ySize|j                                          (ySize - 1) * xSize + xSize - 1
+ * ySize-1|j                                (ySize - 1) * xSize + xSize - 1
  *  |------------------------------------------------------
  *  |
  *  |
@@ -86,107 +88,112 @@ HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContaine
  *  |
  *  |------------------------------------------------------>
  *
- *  0                                                xSize|i
+ *  0                                                xSize-1|i
  *
  *  j * xSize + i
  ***/
-template<typename Real>
-template<typename Device>
+template <typename Real>
+template <typename Device>
 bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) const
 {
-  // This is always an external storage for grid.
-  TNL::Containers::Array<Real, Device> ux(params.xSize * params.ySize),  // data at step u
-                                       aux(params.xSize * params.ySize); // data at step u + 1
+   // This is always an external storage for grid.
+   TNL::Containers::Array<Real, Device> ux(params.xSize * params.ySize), // data at step u
+                                        aux(params.xSize * params.ySize);// data at step u + 1
 
-  // Invalidate ux/aux
-  ux = 0;
-  aux = 0;
+   // Invalidate ux/aux
+   ux = 0;
+   aux = 0;
 
-  const Real hx = params.xDomainSize / (Real)params.xSize;
-  const Real hy = params.yDomainSize / (Real)params.ySize;
-  const Real hx_inv = 1 / (hx * hx);
-  const Real hy_inv = 1 / (hy * hy);
+   const Real hx = params.xDomainSize / (Real)params.xSize;
+   const Real hy = params.yDomainSize / (Real)params.ySize;
+   const Real hx_inv = 1 / (hx * hx);
+   const Real hy_inv = 1 / (hy * hy);
 
-  auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
 
-  auto uxView = ux.getView(), auxView = aux.getView();
+   auto uxView = ux.getView(), auxView = aux.getView();
 
-  auto init = [=] __cuda_callable__(int i, int j) mutable {
-    auto index = j * params.xSize + i;
+   auto init = [=] __cuda_callable__(int i, int j) mutable
+   {
+      auto index = j * params.xSize + i;
 
-    auto x = i * hx - params.xDomainSize / 2;
-    auto y = j * hy - params.yDomainSize / 2;
+      auto x = i * hx - params.xDomainSize / 2;
+      auto y = j * hy - params.yDomainSize / 2;
 
-    uxView[index] = exp(params.sigma * (x * x + y * y));
-  };
+      uxView[index] = exp(params.sigma * (x * x + y * y));
+   };
 
-  TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, init);
+   TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, init);
 
-  if (!writeGNUPlot("data.txt", params, ux))
-    return false;
+   if (!writeGNUPlot("data.txt", params, ux))
+      return false;
 
-  auto next = [=] __cuda_callable__(int i, int j) mutable {
-    auto index = j * params.ySize + i;
+   auto next = [=] __cuda_callable__(int i, int j) mutable
+   {
+      auto index = j * params.ySize + i;
 
-    auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
-                     (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv;
-  };
+      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
+                       (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv;
+   };
 
-  auto update = [=] __cuda_callable__(int i) mutable {
-    uxView[i] += auxView[i] * timestep;
-  };
+   auto update = [=] __cuda_callable__(int i) mutable
+   {
+      uxView[i] += auxView[i] * timestep;
+   };
 
-  Real start = 0;
+   Real start = 0;
 
-  while (start < params.finalTime)
-  {
-    TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, next);
-    TNL::Algorithms::ParallelFor<Device>::exec(0, params.xSize * params.ySize, update);
+   while (start < params.finalTime)
+   {
+      TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, next);
+      TNL::Algorithms::ParallelFor<Device>::exec(0, params.xSize * params.ySize, update);
 
-    start += timestep;
-  }
+      start += timestep;
+   }
 
-  return writeGNUPlot("data_final.txt", params, ux);
+   return writeGNUPlot("data_final.txt", params, ux);
 }
 
 template <typename Real>
 template <typename Device>
 bool HeatmapSolver<Real>::writeGNUPlot(const std::string &filename,
                                        const HeatmapSolver<Real>::Parameters &params,
-                                       const TNL::Containers::Array<Real, Device> &map) const {
-  std::ofstream out(filename, std::ios::out);
+                                       const TNL::Containers::Array<Real, Device> &map) const
+{
+   std::ofstream out(filename, std::ios::out);
 
-  if (!out.is_open())
-    return false;
+   if (!out.is_open())
+      return false;
 
-  for (int j = 0; j < params.ySize; j++)
-    for (int i = 0; i < params.xSize; i++)
-      out << i << " " << j << " " << map[j * params.xSize + i] << std::endl;
+   for (int j = 0; j < params.ySize; j++)
+      for (int i = 0; i < params.xSize; i++)
+         out << i << " " << j << " " << map[j * params.xSize + i] << std::endl;
 
-  return out.good();
+   return out.good();
 }
 
-int main(int argc, char * argv[]) {
-  using Real = double;
+int main(int argc, char *argv[])
+{
+   using Real = double;
 
-  auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
+   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
 
-  TNL::Config::ParameterContainer parameters;
-  if (!parseCommandLine(argc, argv, config, parameters))
-    return EXIT_FAILURE;
+   TNL::Config::ParameterContainer parameters;
+   if (!parseCommandLine(argc, argv, config, parameters))
+      return EXIT_FAILURE;
 
-  auto device = parameters.getParameter<TNL::String>("device");
-  auto params = HeatmapSolver<Real>::Parameters(parameters);
+   auto device = parameters.getParameter<TNL::String>("device");
+   auto params = HeatmapSolver<Real>::Parameters(parameters);
 
-  HeatmapSolver<Real> solver;
+   HeatmapSolver<Real> solver;
 
-  if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
-    return EXIT_FAILURE;
+   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+      return EXIT_FAILURE;
 
 #ifdef HAVE_CUDA
-  if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
-    return EXIT_FAILURE;
+   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+      return EXIT_FAILURE;
 #endif
 
-  return EXIT_SUCCESS;
+   return EXIT_SUCCESS;
 }
-- 
GitLab


From c2a06a20975193327281fcdf7c58398135d937b8 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Tue, 2 Nov 2021 18:33:13 +0100
Subject: [PATCH 06/29] Implement dimension set/get

---
 src/Benchmarks/CMakeLists.txt                 | 21 ++++-
 .../HeatEquationGrid/CMakeLists.txt           |  3 +-
 .../HeatmapGrid/CMakeLists.txt                |  5 ++
 .../HeatEquationGrid/HeatmapGrid/main.cpp     |  2 +
 .../HeatEquationGrid/HeatmapGrid/main.cu      |  2 +
 .../HeatEquationGrid/HeatmapGrid/main.h       | 78 +++++++++++++++++++
 6 files changed, 109 insertions(+), 2 deletions(-)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h

diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 6fa019081..4ef9404fc 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -1,5 +1,6 @@
-add_subdirectory( HeatEquation )
+#add_subdirectory( HeatEquation )
 add_subdirectory( HeatEquationGrid )
+<<<<<<< HEAD
 add_subdirectory( BLAS )
 add_subdirectory( NDArray )
 add_subdirectory( SpMV )
@@ -8,3 +9,21 @@ add_subdirectory( LinearSolvers )
 add_subdirectory( ODESolvers )
 add_subdirectory( Sorting )
 add_subdirectory( Traversers )
+=======
+#add_subdirectory( BLAS )
+#add_subdirectory( NDArray )
+#add_subdirectory( SpMV )
+#add_subdirectory( DistSpMV )
+#add_subdirectory( LinearSolvers )
+#add_subdirectory( ODESolvers )
+#add_subdirectory( Sorting )
+#add_subdirectory( Traversers )
+
+set( headers
+         Benchmarks.h
+         FunctionTimer.h
+         Logging.h
+)
+
+install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
+>>>>>>> 049219f08 (Implement dimension set/get)
diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
index 867619052..d7cbc2027 100644
--- a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
@@ -1 +1,2 @@
-ADD_SUBDIRECTORY( HeatmapParallelFor )
+#ADD_SUBDIRECTORY( HeatmapParallelFor )
+ADD_SUBDIRECTORY( HeatmapGrid )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
new file mode 100644
index 000000000..a4c44b591
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
@@ -0,0 +1,5 @@
+#if (BUILD_CUDA)
+#   CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu )
+#ELSE()
+   add_executable( benchmark_heat_grid main.h main.cpp )
+#ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
new file mode 100644
index 000000000..6c910144c
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -0,0 +1,78 @@
+
+#include <iostream>
+#include <fstream>
+#include <numeric>
+#include <type_traits>
+
+#include <TNL/Config/parseCommandLine.h>
+#include <TNL/Devices/Host.h>
+
+
+// Utils
+
+// Thanks for implementation idea https://www.fluentcpp.com/2021/04/30/how-to-implement-stdconjunction-and-stddisjunction-in-c11/
+// TODO: Implicit bool looks strange because, then I need to write ::value everywhere...
+
+template<bool ...> struct bool_pack {};
+
+template <bool... Bs>
+using conjunction = std::is_same<bool_pack<true, Bs...>, bool_pack<Bs..., true>>;
+
+// Any N dimensional grid:
+//   1. Should provide local|remote step
+//   2. Should know its own limits. Count of the elements at each dimension
+
+template<int Dimension,
+         typename Index,
+         typename Device = TNL::Devices::Host>
+class Grid {
+   public:
+      Grid() {}
+      ~Grid() {}
+
+      template <typename... Dimensions,
+                typename = std::enable_if_t<conjunction<std::is_same<Index, Dimensions>::value...>::value>,
+                typename = std::enable_if_t<sizeof...(Dimensions) == Dimension>>
+      void setDimensions(Dimensions... dimensions) noexcept {
+         Index i = 0;
+
+         for (auto x: { dimensions... }) {
+            this -> dimensions[i] = x;
+            i++;
+         }
+      }
+
+      template <typename... DimensionIndex,
+                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>>
+      std::vector<Index> getDimensions(DimensionIndex... indicies) const noexcept {
+         std::vector<Index> index{ indicies... }, result;
+
+         std::transform(index.begin(), index.end(), std::back_inserter(result), [this](const Index& index) {
+            return this -> dimensions[index];
+         });
+
+         return result;
+      }
+
+   private:
+      Index dimensions[Dimension];
+};
+
+int main(int argc, char *argv[]) {
+   Grid<4, int> grid;
+
+   grid.setDimensions(1, 2, 3, 4);
+
+   auto dimensions = grid.getDimensions(0, 1, 2);
+
+   std::cout << dimensions[0] << dimensions[2] << dimensions[1] << std::endl;
+
+
+   // const int dimensions = 4;
+
+   // int sizes[dimensions] = { 3, 3, 3, 3 };
+   // int limits[dimensions] = { 0 };
+
+
+   return 0;
+}
-- 
GitLab


From 0cbf5fb41c8ec2db67c77275dd4bdfb9e3d7ccf6 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 4 Nov 2021 14:22:47 +0100
Subject: [PATCH 07/29] Implement n-dimensional grid maximal indexes

---
 .../HeatEquationGrid/HeatmapGrid/main.h       | 87 +++++++++++++++----
 1 file changed, 70 insertions(+), 17 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 6c910144c..5a1f736ef 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -3,6 +3,7 @@
 #include <fstream>
 #include <numeric>
 #include <type_traits>
+#include <array>
 
 #include <TNL/Config/parseCommandLine.h>
 #include <TNL/Devices/Host.h>
@@ -10,9 +11,6 @@
 
 // Utils
 
-// Thanks for implementation idea https://www.fluentcpp.com/2021/04/30/how-to-implement-stdconjunction-and-stddisjunction-in-c11/
-// TODO: Implicit bool looks strange because, then I need to write ::value everywhere...
-
 template<bool ...> struct bool_pack {};
 
 template <bool... Bs>
@@ -24,7 +22,8 @@ using conjunction = std::is_same<bool_pack<true, Bs...>, bool_pack<Bs..., true>>
 
 template<int Dimension,
          typename Index,
-         typename Device = TNL::Devices::Host>
+         typename Device = TNL::Devices::Host,
+         typename = std::enable_if_t<(Dimension > 0)>>
 class Grid {
    public:
       Grid() {}
@@ -40,8 +39,11 @@ class Grid {
             this -> dimensions[i] = x;
             i++;
          }
+
+         refreshDimensionMaps();
       }
 
+      // TODO: - Add __cuda_callable__
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>>
       std::vector<Index> getDimensions(DimensionIndex... indicies) const noexcept {
@@ -54,25 +56,76 @@ class Grid {
          return result;
       }
 
-   private:
-      Index dimensions[Dimension];
-};
-
-int main(int argc, char *argv[]) {
-   Grid<4, int> grid;
-
-   grid.setDimensions(1, 2, 3, 4);
+      // TODO: - Implement typechecking for function
+      template <typename Function,
+                typename... FunctionArgs>
+      void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept {
 
-   auto dimensions = grid.getDimensions(0, 1, 2);
+      }
+   private:
+      std::array<Index, Dimension> dimensions;
+      /**
+       * @brief - A dimension map is a store for dimension limits over all combinations of basis.
+       *          First, (n choose 0) elements will contain the count of 0 dimension elements
+       *          Second, (n choose 1) elements will contain the count of 1-dimension elements
+       *          ....
+       *
+       *          For example, let's have a 3-d grid, then the map indexing will be the next:
+       *            0 - 0 - count of vertices
+       *            1, 2, 3 - count of edges in x, y, z plane
+       *            4, 5, 6 - count of faces in xy, xz, yz plane
+       *            7 - count of cells in xyz plane
+       *
+       * @warning - The ordering of is lexigraphical.
+       */
+      std::array<Index, 1 << Dimension> dimensionMap;
+      /**
+       * @brief - A cumulative map over dimensions.
+       */
+      std::array<Index, Dimension + 1> cumulativeDimensionMap;
+
+      /**
+       * @brief - Fills dimensions map for N-dimensional Grid.
+       *
+       * @complexity - O(2 ^ Dimension)
+       */
+      void refreshDimensionMaps() noexcept {
+         std::array<bool, Dimension> combinationBuffer = {};
+         std::size_t j = 0;
+
+         std::fill(cumulativeDimensionMap.begin(), cumulativeDimensionMap.end(), 0);
+
+         for (std::size_t i = 0; i <= Dimension; i++) {
+            std::fill(combinationBuffer.begin(), combinationBuffer.end(), false);
+            std::fill(combinationBuffer.end() - i, combinationBuffer.end(), true);
+
+            do {
+               int result = 1;
+
+               for (std::size_t i = 0; i < combinationBuffer.size(); i++)
+                  result *= combinationBuffer[i] ? dimensions[i] : dimensions[i] + 1;
+
+               dimensionMap[j] = result;
+               cumulativeDimensionMap[i] += result;
+
+               j++;
+            } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end()));
+         }
 
-   std::cout << dimensions[0] << dimensions[2] << dimensions[1] << std::endl;
+         for (int i = 0; i < 1 << Dimension; i++) {
+            std::cout << dimensionMap[i] << " ";
+         }
 
+         std::cout << std::endl;
+      }
+};
 
-   // const int dimensions = 4;
+int main(int argc, char *argv[]) {
+   Grid<3, int> grid;
 
-   // int sizes[dimensions] = { 3, 3, 3, 3 };
-   // int limits[dimensions] = { 0 };
+   grid.setDimensions(1, 2, 3);
 
+  // auto dimensions = grid.getDimensions(0, 1, 2);
 
    return 0;
 }
-- 
GitLab


From 96d7804df065bec9c1d206379a772b14bd19a8b0 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 4 Nov 2021 15:16:20 +0100
Subject: [PATCH 08/29] Move out separate heatmap solver implementation.

---
 .../HeatEquationGrid/Base/HeatmapSolver.h     | 94 +++++++++++++++++++
 .../HeatEquationGrid/CMakeLists.txt           |  2 +-
 .../HeatEquationGrid/HeatmapGrid/main.h       | 87 ++++++++++++-----
 .../HeatmapParallelFor/CMakeLists.txt         |  4 +-
 .../HeatmapParallelFor/main.h                 | 89 +-----------------
 5 files changed, 166 insertions(+), 110 deletions(-)
 create mode 100644 src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h

diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
new file mode 100644
index 000000000..92a2b4611
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
@@ -0,0 +1,94 @@
+
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <numeric>
+#include <type_traits>
+#include <array>
+
+#include <TNL/Config/parseCommandLine.h>
+#include <TNL/Containers/Array.h>
+
+template <typename Real>
+class HeatmapSolver
+{
+public:
+   class Parameters
+   {
+   public:
+      const int xSize, ySize;
+      const Real xDomainSize, yDomainSize;
+      const Real sigma;
+      const Real timeStep, finalTime;
+      const bool verbose;
+
+      Parameters(const TNL::Config::ParameterContainer &parameters);
+
+      static TNL::Config::ConfigDescription makeInputConfig();
+   };
+
+   template <typename Device>
+   bool solve(const Parameters &parameters) const;
+
+private:
+   template <typename Device>
+   bool writeGNUPlot(const std::string &filename,
+                     const Parameters &parameters,
+                     const TNL::Containers::Array<Real, Device> &map) const;
+};
+
+template <typename Real>
+TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig()
+{
+   TNL::Config::ConfigDescription config;
+
+   config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
+   config.addEntryEnum<TNL::String>("host");
+
+#ifdef HAVE_CUDA
+   config.addEntryEnum<TNL::String>("cuda");
+#endif
+
+   config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
+   config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
+
+   config.addEntry<Real>("domain-x-size", "Domain size along x-axis.", 2.0);
+   config.addEntry<Real>("domain-y-size", "Domain size along y-axis.", 2.0);
+
+   config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 1.0);
+
+   config.addEntry<Real>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
+   config.addEntry<Real>("final-time", "Final time of the simulation.", 0.012);
+   config.addEntry<bool>("verbose", "Verbose mode.", true);
+
+   return config;
+}
+
+template <typename Real>
+HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContainer &parameters) : xSize(parameters.getParameter<int>("grid-x-size")),
+                                                                                                 ySize(parameters.getParameter<int>("grid-y-size")),
+                                                                                                 xDomainSize(parameters.getParameter<Real>("domain-x-size")),
+                                                                                                 yDomainSize(parameters.getParameter<Real>("domain-y-size")),
+                                                                                                 sigma(parameters.getParameter<Real>("sigma")),
+                                                                                                 timeStep(parameters.getParameter<Real>("time-step")),
+                                                                                                 finalTime(parameters.getParameter<Real>("final-time")),
+                                                                                                 verbose(parameters.getParameter<bool>("verbose")) {}
+
+template <typename Real>
+template <typename Device>
+bool HeatmapSolver<Real>::writeGNUPlot(const std::string &filename,
+                                       const HeatmapSolver<Real>::Parameters &params,
+                                       const TNL::Containers::Array<Real, Device> &map) const
+{
+   std::ofstream out(filename, std::ios::out);
+
+   if (!out.is_open())
+      return false;
+
+   for (int j = 0; j < params.ySize; j++)
+      for (int i = 0; i < params.xSize; i++)
+         out << i << " " << j << " " << map[j * params.xSize + i] << std::endl;
+
+   return out.good();
+}
diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
index d7cbc2027..9adc7520d 100644
--- a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
@@ -1,2 +1,2 @@
-#ADD_SUBDIRECTORY( HeatmapParallelFor )
+ADD_SUBDIRECTORY( HeatmapParallelFor )
 ADD_SUBDIRECTORY( HeatmapGrid )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 5a1f736ef..5f5e2cb4e 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -5,21 +5,20 @@
 #include <type_traits>
 #include <array>
 
+#include "../Base/HeatmapSolver.h"
+
 #include <TNL/Config/parseCommandLine.h>
 #include <TNL/Devices/Host.h>
+#include <TNL/Containers/Array.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
-
-// Utils
+#pragma once
 
 template<bool ...> struct bool_pack {};
 
 template <bool... Bs>
 using conjunction = std::is_same<bool_pack<true, Bs...>, bool_pack<Bs..., true>>;
 
-// Any N dimensional grid:
-//   1. Should provide local|remote step
-//   2. Should know its own limits. Count of the elements at each dimension
-
 template<int Dimension,
          typename Index,
          typename Device = TNL::Devices::Host,
@@ -29,6 +28,10 @@ class Grid {
       Grid() {}
       ~Grid() {}
 
+      /**
+       *  @brief - Specifies dimensions of the grid
+       *  @param[in] dimensions - A parameter pack, which specifies edges count in the specific dimension
+       */
       template <typename... Dimensions,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, Dimensions>::value...>::value>,
                 typename = std::enable_if_t<sizeof...(Dimensions) == Dimension>>
@@ -42,24 +45,54 @@ class Grid {
 
          refreshDimensionMaps();
       }
-
-      // TODO: - Add __cuda_callable__
+      /**
+       * @param[in] indicies - A dimension index pack
+       *
+       * - TODO: - Add __cuda_callable__
+       */
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>>
       std::vector<Index> getDimensions(DimensionIndex... indicies) const noexcept {
-         std::vector<Index> index{ indicies... }, result;
+         std::vector<Index> dimensionIndicies{ indicies... }, result;
+
+         std::transform(dimensionIndicies.begin(),
+                        dimensionIndicies.end(),
+                        std::back_inserter(result), [this](const Index& index) {
+            TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
+            TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension");
 
-         std::transform(index.begin(), index.end(), std::back_inserter(result), [this](const Index& index) {
             return this -> dimensions[index];
          });
 
          return result;
       }
+      /**
+       * @brief - Returns the number of entities of specific dimension
+       */
+      template <typename... DimensionIndex,
+                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>>
+      std::vector<Index> getEntitiesCount(const Index dimension...) const noexcept
+      {
+         std::vector<Index> dimensionIndicies{indicies...}, result;
+
+         std::transform(dimensionIndicies.begin(),
+                        dimensionIndicies.end(),
+                        std::back_inserter(result), [this](const Index &index) {
+            TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
+            TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension");
+
+            return this->cumulativeDimensionMap[index];
+         });
+
+         return result;
+      }
+      /**
+       * @brief - Traversers a specified dimension in parallel.
+       */
+      template <typename Function>
+      void traverse(const Index dimension, Function function) const noexcept {
+         auto entitiesCount = getEntitiesCount(dimension)[0];
 
-      // TODO: - Implement typechecking for function
-      template <typename Function,
-                typename... FunctionArgs>
-      void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept {
 
       }
    private:
@@ -83,7 +116,6 @@ class Grid {
        * @brief - A cumulative map over dimensions.
        */
       std::array<Index, Dimension + 1> cumulativeDimensionMap;
-
       /**
        * @brief - Fills dimensions map for N-dimensional Grid.
        *
@@ -111,13 +143,26 @@ class Grid {
                j++;
             } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end()));
          }
+      }
+};
+
+template <typename Real>
+template <typename Device>
+bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) const {
+   Grid<2, int, Device> grid;
+
+   grid.setDimensions(params.xSize, params.ySize);
+
+   auto entitiesCount = grid.getEntitiesCount(2)[0];
+
+   TNL::Containers::Array<Real, Device> ux(entitiesCount), // data at step u
+                                        aux(entitiesCount);// data at step u + 1
+
+   // Invalidate ux/aux
+   ux = 0;
+   aux = 0;
 
-         for (int i = 0; i < 1 << Dimension; i++) {
-            std::cout << dimensionMap[i] << " ";
-         }
 
-         std::cout << std::endl;
-      }
 };
 
 int main(int argc, char *argv[]) {
@@ -125,7 +170,7 @@ int main(int argc, char *argv[]) {
 
    grid.setDimensions(1, 2, 3);
 
-  // auto dimensions = grid.getDimensions(0, 1, 2);
+
 
    return 0;
 }
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
index 054a28cf8..51ee69d15 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu )
+   CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cu )
 ELSE()
-   add_executable( benchmark_heat_parallel_for main.h main.cpp )
+   add_executable( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cpp )
 ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
index 311fc94e5..4e84e0d8d 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
@@ -5,72 +5,9 @@
 #include <TNL/Containers/Array.h>
 #include <TNL/Algorithms/ParallelFor.h>
 
-#pragma once
-
-template <typename Real>
-class HeatmapSolver
-{
-public:
-   class Parameters
-   {
-   public:
-      const int xSize, ySize;
-      const Real xDomainSize, yDomainSize;
-      const Real sigma;
-      const Real timeStep, finalTime;
-      const bool verbose;
-
-      Parameters(const TNL::Config::ParameterContainer &parameters);
-
-      static TNL::Config::ConfigDescription makeInputConfig();
-   };
-
-   template <typename Device>
-   bool solve(const Parameters &parameters) const;
-
-private:
-   template <typename Device>
-   bool writeGNUPlot(const std::string &filename,
-                     const Parameters &parameters,
-                     const TNL::Containers::Array<Real, Device> &map) const;
-};
-
-template <typename Real>
-TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig()
-{
-   TNL::Config::ConfigDescription config;
-
-   config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
-   config.addEntryEnum<TNL::String>("host");
-
-#ifdef HAVE_CUDA
-   config.addEntryEnum<TNL::String>("cuda");
-#endif
-
-   config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
-   config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
+#include "../Base/HeatmapSolver.h"
 
-   config.addEntry<Real>("domain-x-size", "Domain size along x-axis.", 2.0);
-   config.addEntry<Real>("domain-y-size", "Domain size along y-axis.", 2.0);
-
-   config.addEntry<Real>("sigma", "Sigma in exponential initial condition.", 1.0);
-
-   config.addEntry<Real>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
-   config.addEntry<Real>("final-time", "Final time of the simulation.", 0.012);
-   config.addEntry<bool>("verbose", "Verbose mode.", true);
-
-   return config;
-}
-
-template <typename Real>
-HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContainer &parameters) : xSize(parameters.getParameter<int>("grid-x-size")),
-                                                                                                 ySize(parameters.getParameter<int>("grid-y-size")),
-                                                                                                 xDomainSize(parameters.getParameter<Real>("domain-x-size")),
-                                                                                                 yDomainSize(parameters.getParameter<Real>("domain-y-size")),
-                                                                                                 sigma(parameters.getParameter<Real>("sigma")),
-                                                                                                 timeStep(parameters.getParameter<Real>("time-step")),
-                                                                                                 finalTime(parameters.getParameter<Real>("final-time")),
-                                                                                                 verbose(parameters.getParameter<bool>("verbose")) {}
+#pragma once
 
 /***
  * Grid parameters:
@@ -96,7 +33,6 @@ template <typename Real>
 template <typename Device>
 bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) const
 {
-   // This is always an external storage for grid.
    TNL::Containers::Array<Real, Device> ux(params.xSize * params.ySize), // data at step u
                                         aux(params.xSize * params.ySize);// data at step u + 1
 
@@ -154,26 +90,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    return writeGNUPlot("data_final.txt", params, ux);
 }
 
-template <typename Real>
-template <typename Device>
-bool HeatmapSolver<Real>::writeGNUPlot(const std::string &filename,
-                                       const HeatmapSolver<Real>::Parameters &params,
-                                       const TNL::Containers::Array<Real, Device> &map) const
-{
-   std::ofstream out(filename, std::ios::out);
-
-   if (!out.is_open())
-      return false;
-
-   for (int j = 0; j < params.ySize; j++)
-      for (int i = 0; i < params.xSize; i++)
-         out << i << " " << j << " " << map[j * params.xSize + i] << std::endl;
-
-   return out.good();
-}
-
-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
    using Real = double;
 
    auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
-- 
GitLab


From ae1ed75418fccffad116d205d2eb9d510a4a88a2 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Tue, 9 Nov 2021 16:26:09 +0100
Subject: [PATCH 09/29] Implement basic traversal algorithm. Fix indexing in
 ParallelFor benchmark

---
 .../HeatEquationGrid/HeatmapGrid/main.h       | 76 +++++++++++++++----
 .../HeatmapParallelFor/main.h                 |  2 +-
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 5f5e2cb4e..9debca43f 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -45,43 +45,57 @@ class Grid {
 
          refreshDimensionMaps();
       }
+      /**
+       * @param[in] index - index of dimension
+       */
+      inline __cuda_callable__ Index getDimension(Index index) const noexcept {
+         TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension");
+
+         return dimensions[index];
+      }
       /**
        * @param[in] indicies - A dimension index pack
        *
        * - TODO: - Add __cuda_callable__
        */
       template <typename... DimensionIndex,
-                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>>
+                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
+                typename = std::enable_if_t<sizeof...(DimensionIndex) > 0>>
       std::vector<Index> getDimensions(DimensionIndex... indicies) const noexcept {
          std::vector<Index> dimensionIndicies{ indicies... }, result;
 
          std::transform(dimensionIndicies.begin(),
                         dimensionIndicies.end(),
                         std::back_inserter(result), [this](const Index& index) {
-            TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
-            TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension");
-
-            return this -> dimensions[index];
+            return this -> getDimension(index);
          });
 
          return result;
       }
+      /**
+       * @param[in] index - index of dimension
+       */
+      inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept {
+         TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension");
+
+         return cumulativeDimensionMap(index);
+      }
       /**
        * @brief - Returns the number of entities of specific dimension
        */
       template <typename... DimensionIndex,
-                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>>
-      std::vector<Index> getEntitiesCount(const Index dimension...) const noexcept
+                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
+                typename = std::enable_if_t<sizeof...(DimensionIndex) > 0>>
+      std::vector<Index> getEntitiesCounts(DimensionIndex... indicies) const noexcept
       {
          std::vector<Index> dimensionIndicies{indicies...}, result;
 
          std::transform(dimensionIndicies.begin(),
                         dimensionIndicies.end(),
                         std::back_inserter(result), [this](const Index &index) {
-            TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
-            TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension");
-
-            return this->cumulativeDimensionMap[index];
+            return getEntitiesCount(index);
          });
 
          return result;
@@ -89,11 +103,33 @@ class Grid {
       /**
        * @brief - Traversers a specified dimension in parallel.
        */
-      template <typename Function>
-      void traverse(const Index dimension, Function function) const noexcept {
+      template <typename Function, typename... FunctionArgs>
+      void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept {
          auto entitiesCount = getEntitiesCount(dimension)[0];
+         auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index };
 
-
+         traverse(0, entitiesCount, identity, function, args...);
+      }
+      /**
+       * TODO: - A possibility of improvement, as it should be possible to specify more precise functions
+       *         to remove user knowledge of functionality
+       */
+      template <typename Function, typename IndexTransform, typename... FunctionArgs>
+      void traverse(const Index start,
+                    const Index end,
+                    IndexTransform transform,
+                    Function function,
+                    FunctionArgs... args) const noexcept {
+         TNL_ASSERT_LT(startIndex, endIndex, "Start index must be less than endIndex")
+
+         auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable
+         {
+            auto transformedIndex = transform(index);
+
+            function(transformedIndex, args...);
+         };
+
+         TNL::Algorithms::ParallelFor<Device>::exec(startIndex, endIndex, lambda, args...);
       }
    private:
       std::array<Index, Dimension> dimensions;
@@ -153,7 +189,15 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    grid.setDimensions(params.xSize, params.ySize);
 
-   auto entitiesCount = grid.getEntitiesCount(2)[0];
+   const Real hx = params.xDomainSize / (Real)grid.getDimension(0);
+   const Real hy = params.yDomainSize / (Real)grid.getDimension(1);
+   const Real hx_inv = 1 / (hx * hx);
+   const Real hy_inv = 1 / (hy * hy);
+
+   auto entitiesCount = grid.getEntitiesCount(0);
+   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+
+   auto uxView = ux.getView(), auxView = aux.getView();
 
    TNL::Containers::Array<Real, Device> ux(entitiesCount), // data at step u
                                         aux(entitiesCount);// data at step u + 1
@@ -163,6 +207,8 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    aux = 0;
 
 
+
+   return false;
 };
 
 int main(int argc, char *argv[]) {
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
index 4e84e0d8d..08658ea14 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
@@ -66,7 +66,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    auto next = [=] __cuda_callable__(int i, int j) mutable
    {
-      auto index = j * params.ySize + i;
+      auto index = j * params.xSize + i;
 
       auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
                        (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv;
-- 
GitLab


From 88a8c55137ef9ca42ac23c15cabb0c04d65f321b Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Tue, 9 Nov 2021 18:17:02 +0100
Subject: [PATCH 10/29] Implement fixes to correctly represent dimension.
 Update code to use TNL Containers

---
 .../HeatEquationGrid/HeatmapGrid/main.h       | 57 ++++++++++---------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 9debca43f..57a96a6d4 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -10,6 +10,7 @@
 #include <TNL/Config/parseCommandLine.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Array.h>
+#include <TNL/Containers/StaticArray.h>
 #include <TNL/Algorithms/ParallelFor.h>
 
 #pragma once
@@ -61,15 +62,12 @@ class Grid {
        */
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
-                typename = std::enable_if_t<sizeof...(DimensionIndex) > 0>>
-      std::vector<Index> getDimensions(DimensionIndex... indicies) const noexcept {
-         std::vector<Index> dimensionIndicies{ indicies... }, result;
+                typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
+      TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indicies) const noexcept {
+         TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> result{indicies...};
 
-         std::transform(dimensionIndicies.begin(),
-                        dimensionIndicies.end(),
-                        std::back_inserter(result), [this](const Index& index) {
-            return this -> getDimension(index);
-         });
+         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
+            result[i] = this -> getDimension(result[i]);
 
          return result;
       }
@@ -87,16 +85,13 @@ class Grid {
        */
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
-                typename = std::enable_if_t<sizeof...(DimensionIndex) > 0>>
-      std::vector<Index> getEntitiesCounts(DimensionIndex... indicies) const noexcept
+                typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
+      TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indicies) const noexcept
       {
-         std::vector<Index> dimensionIndicies{indicies...}, result;
+         TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> result{indicies...};
 
-         std::transform(dimensionIndicies.begin(),
-                        dimensionIndicies.end(),
-                        std::back_inserter(result), [this](const Index &index) {
-            return getEntitiesCount(index);
-         });
+         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
+            result[i] = this -> getEntitiesCount(result[i]);
 
          return result;
       }
@@ -106,7 +101,7 @@ class Grid {
       template <typename Function, typename... FunctionArgs>
       void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept {
          auto entitiesCount = getEntitiesCount(dimension)[0];
-         auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index };
+         auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index; };
 
          traverse(0, entitiesCount, identity, function, args...);
       }
@@ -120,7 +115,7 @@ class Grid {
                     IndexTransform transform,
                     Function function,
                     FunctionArgs... args) const noexcept {
-         TNL_ASSERT_LT(startIndex, endIndex, "Start index must be less than endIndex")
+         TNL_ASSERT_LT(start, end, "Start index must be less than endIndex")
 
          auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable
          {
@@ -129,10 +124,10 @@ class Grid {
             function(transformedIndex, args...);
          };
 
-         TNL::Algorithms::ParallelFor<Device>::exec(startIndex, endIndex, lambda, args...);
+         TNL::Algorithms::ParallelFor<Device>::exec(start, end, lambda, args...);
       }
    private:
-      std::array<Index, Dimension> dimensions;
+      TNL::Containers::StaticArray<Dimension, Index> dimensions;
       /**
        * @brief - A dimension map is a store for dimension limits over all combinations of basis.
        *          First, (n choose 0) elements will contain the count of 0 dimension elements
@@ -147,11 +142,11 @@ class Grid {
        *
        * @warning - The ordering of is lexigraphical.
        */
-      std::array<Index, 1 << Dimension> dimensionMap;
+      TNL::Containers::StaticArray<1 << Dimension, Index> dimensionMap;
       /**
        * @brief - A cumulative map over dimensions.
        */
-      std::array<Index, Dimension + 1> cumulativeDimensionMap;
+      TNL::Containers::StaticArray<Dimension + 1, Index> cumulativeDimensionMap;
       /**
        * @brief - Fills dimensions map for N-dimensional Grid.
        *
@@ -161,7 +156,8 @@ class Grid {
          std::array<bool, Dimension> combinationBuffer = {};
          std::size_t j = 0;
 
-         std::fill(cumulativeDimensionMap.begin(), cumulativeDimensionMap.end(), 0);
+         for (std::size_t i = 0; i < Dimension + 1; i++)
+            cumulativeDimensionMap[i] = 0;
 
          for (std::size_t i = 0; i <= Dimension; i++) {
             std::fill(combinationBuffer.begin(), combinationBuffer.end(), false);
@@ -170,8 +166,15 @@ class Grid {
             do {
                int result = 1;
 
-               for (std::size_t i = 0; i < combinationBuffer.size(); i++)
-                  result *= combinationBuffer[i] ? dimensions[i] : dimensions[i] + 1;
+               for (std::size_t i = 0; i < combinationBuffer.size(); i++) {
+                  // Dimensions are stored in the least significant order.
+                  // For example, the list of dimensions [1, 2, 3] is the size along [x, y, z] dimensions
+                  // Combination generator follow lexical order, so the first element of the sequence is [0, 0, ..., 0, 1].
+                  // That's why we need to reverse index to obtain valid dimension.
+                  auto reversedIndex = combinationBuffer.size() - 1 - i;
+
+                  result *= combinationBuffer[reversedIndex] ? dimensions[i] : dimensions[i] + 1;
+               }
 
                dimensionMap[j] = result;
                cumulativeDimensionMap[i] += result;
@@ -197,11 +200,11 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    auto entitiesCount = grid.getEntitiesCount(0);
    auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
 
-   auto uxView = ux.getView(), auxView = aux.getView();
-
    TNL::Containers::Array<Real, Device> ux(entitiesCount), // data at step u
                                         aux(entitiesCount);// data at step u + 1
 
+   auto uxView = ux.getView(), auxView = aux.getView();
+
    // Invalidate ux/aux
    ux = 0;
    aux = 0;
-- 
GitLab


From 40d87ac990f86c6636a2e4f8a795dc54d6b1941f Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 11 Nov 2021 11:50:03 +0100
Subject: [PATCH 11/29] Implement traversal methods

---
 .../HeatmapGrid/CMakeLists.txt                |   8 +-
 .../HeatEquationGrid/HeatmapGrid/main.h       | 140 +++++++++++++-----
 2 files changed, 104 insertions(+), 44 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
index a4c44b591..6649d0fb2 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
@@ -1,5 +1,5 @@
-#if (BUILD_CUDA)
-#   CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu )
-#ELSE()
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu )
+ELSE()
    add_executable( benchmark_heat_grid main.h main.cpp )
-#ENDIF()
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 57a96a6d4..0f9d32fb0 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -4,6 +4,7 @@
 #include <numeric>
 #include <type_traits>
 #include <array>
+#include <bitset>
 
 #include "../Base/HeatmapSolver.h"
 
@@ -11,6 +12,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/StaticArray.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/ParallelFor.h>
 
 #pragma once
@@ -20,6 +22,29 @@ template<bool ...> struct bool_pack {};
 template <bool... Bs>
 using conjunction = std::is_same<bool_pack<true, Bs...>, bool_pack<Bs..., true>>;
 
+template<int Dimension,
+         typename Index,
+         typename = std::enable_if_t<(Dimension > 0)>,
+         typename = std::enable_if_t<std::is_integral<Index>::value>>
+using Container = TNL::Containers::StaticArray<Dimension, Index>;
+
+template<int Dimension,
+         typename Index,
+         typename = std::enable_if_t<(Dimension > 0)>,
+         typename = std::enable_if_t<std::is_integral<Index>::value>>
+class GridEntity {
+   public:
+      GridEntity(const Container<Dimension, Index>& origin,
+                 const Container<Dimension, Index>& dimensions,
+                 const std::bitset<Dimension>& direction): origin(origin),
+                                                           dimensions(dimensions),
+                                                           direction(direction) {};
+   private:
+      Container<Dimension, Index> origin;
+      Container<Dimension, Index> dimensions;
+      std::bitset<Dimension> direction;
+};
+
 template<int Dimension,
          typename Index,
          typename Device = TNL::Devices::Host,
@@ -31,7 +56,9 @@ class Grid {
 
       /**
        *  @brief - Specifies dimensions of the grid
-       *  @param[in] dimensions - A parameter pack, which specifies edges count in the specific dimension
+       *  @param[in] dimensions - A parameter pack, which specifies edges count in the specific dimension.
+       *                          Most significant dimension is in the beginning of the list.
+       *                          Least significant dimension is in the end of the list
        */
       template <typename... Dimensions,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, Dimensions>::value...>::value>,
@@ -63,7 +90,7 @@ class Grid {
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
                 typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-      TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indicies) const noexcept {
+      Container<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indicies) const noexcept {
          TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> result{indicies...};
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
@@ -86,9 +113,8 @@ class Grid {
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
                 typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-      TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indicies) const noexcept
-      {
-         TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> result{indicies...};
+      Container<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indicies) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{indicies...};
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
             result[i] = this -> getEntitiesCount(result[i]);
@@ -96,38 +122,81 @@ class Grid {
          return result;
       }
       /**
-       * @brief - Traversers a specified dimension in parallel.
+       * @brief - Traversers all elements in the grid
        */
       template <typename Function, typename... FunctionArgs>
-      void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept {
-         auto entitiesCount = getEntitiesCount(dimension)[0];
-         auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index; };
+      void traverseAll(Function function, FunctionArgs... args) const noexcept {
+         auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable {
+            function(index, args...);
+         };
 
-         traverse(0, entitiesCount, identity, function, args...);
+         TNL::Algorithms::ParallelFor<Device>::exec(0, cumulativeDimensionMap[0], lambda, args...);
       }
       /**
-       * TODO: - A possibility of improvement, as it should be possible to specify more precise functions
-       *         to remove user knowledge of functionality
+       * @brief - Traverses a grid from start index to end index.
+       *
+       * @param[in] start - a start index of point
+       * @param[in] end - an end index of point
+       * @param[in] directions - A pack of boolean vector flags with the size of the dimension.
+       *   For example, let's have the 3-dimensional grid.
+       *     A pack {false, false, false} will call function for all points
+       *     A pack {true, false, false} will call function for edges directed over dimension at index of true
+       *     A pack {true, true, false} will call function for faces directed over dimension at index of true
+       *     A pack {true, true, true} will call function for cells directed over
+       *
        */
-      template <typename Function, typename IndexTransform, typename... FunctionArgs>
-      void traverse(const Index start,
-                    const Index end,
-                    IndexTransform transform,
+      template<typename Function, typename... FunctionArgs>
+      void traverse(const Container<Dimension, Index>& start,
+                    const Container<Dimension, Index>& end,
+                    const TNL::Containers::Vector<std::bitset<Dimension>>& directions,
                     Function function,
                     FunctionArgs... args) const noexcept {
-         TNL_ASSERT_LT(start, end, "Start index must be less than endIndex")
+         // TODO: - This will overflow for higher dimensions
+         Index startCollapsedIndex = 0;
+         Index endCollapsedIndex = 0;
 
-         auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable
-         {
-            auto transformedIndex = transform(index);
+         // TODO: - For higher dimensions, this will overflow.
+         Container<Dimension, Index> multipliers = 1;
 
-            function(transformedIndex, args...);
+         for (Index i = 0; i < Dimension; i++) {
+            for (Index j = i + 1; j < Dimension; j++)
+               multipliers[i] *= dimensions[j];
+
+            startCollapsedIndex += start[i] * multipliers[i];
+            endCollapsedIndex += end[i] * multipliers[i];
+         }
+
+         // TODO: - Improve message formatting
+         TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start..<end]")
+         TNL_ASSERT_LE(endCollapsedIndex, this -> cumulativeDimensionMap[0], "End must be less, than amount of points in grid");
+
+         auto dimensions = this -> dimensions;
+
+         auto outerFunction = [=] __cuda_callable__ (const Index i, FunctionArgs... args) mutable {
+            Index dim = Dimension - 1;
+
+            while (i != 0) {
+               Index newIndex = start[dim] + i, dimension = dimensions[dim];
+               Index quotient = newIndex / dimension;
+               Index reminder = newIndex - (dimension * quotient);
+
+               start[dim] = reminder;
+               i = quotient;
+
+               dim -= 1;
+            }
+
+            for (const auto& direction: directions) {
+               GridEntity<Dimension, Index> entity = { start, dimensions, direction };
+
+               function(entity, args...);
+            }
          };
 
-         TNL::Algorithms::ParallelFor<Device>::exec(start, end, lambda, args...);
+         TNL::Algorithms::ParallelFor<Device>::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...);
       }
    private:
-      TNL::Containers::StaticArray<Dimension, Index> dimensions;
+      Container<Dimension, Index> dimensions;
       /**
        * @brief - A dimension map is a store for dimension limits over all combinations of basis.
        *          First, (n choose 0) elements will contain the count of 0 dimension elements
@@ -136,17 +205,17 @@ class Grid {
        *
        *          For example, let's have a 3-d grid, then the map indexing will be the next:
        *            0 - 0 - count of vertices
-       *            1, 2, 3 - count of edges in x, y, z plane
-       *            4, 5, 6 - count of faces in xy, xz, yz plane
-       *            7 - count of cells in xyz plane
+       *            1, 2, 3 - count of edges in z, y, x plane
+       *            4, 5, 6 - count of faces in yz, xz, xy plane
+       *            7 - count of cells in z y x plane
        *
        * @warning - The ordering of is lexigraphical.
        */
-      TNL::Containers::StaticArray<1 << Dimension, Index> dimensionMap;
+      Container<1 << Dimension, Index> dimensionMap;
       /**
        * @brief - A cumulative map over dimensions.
        */
-      TNL::Containers::StaticArray<Dimension + 1, Index> cumulativeDimensionMap;
+      Container<Dimension + 1, Index> cumulativeDimensionMap;
       /**
        * @brief - Fills dimensions map for N-dimensional Grid.
        *
@@ -166,15 +235,8 @@ class Grid {
             do {
                int result = 1;
 
-               for (std::size_t i = 0; i < combinationBuffer.size(); i++) {
-                  // Dimensions are stored in the least significant order.
-                  // For example, the list of dimensions [1, 2, 3] is the size along [x, y, z] dimensions
-                  // Combination generator follow lexical order, so the first element of the sequence is [0, 0, ..., 0, 1].
-                  // That's why we need to reverse index to obtain valid dimension.
-                  auto reversedIndex = combinationBuffer.size() - 1 - i;
-
-                  result *= combinationBuffer[reversedIndex] ? dimensions[i] : dimensions[i] + 1;
-               }
+               for (std::size_t k = 0; k < combinationBuffer.size(); k++)
+                  result *= combinationBuffer[k] ? dimensions[k] : dimensions[k] + 1;
 
                dimensionMap[j] = result;
                cumulativeDimensionMap[i] += result;
@@ -217,9 +279,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 int main(int argc, char *argv[]) {
    Grid<3, int> grid;
 
-   grid.setDimensions(1, 2, 3);
-
-
+   grid.setDimensions(3, 2, 1);
 
    return 0;
 }
-- 
GitLab


From 8e005e6eadbb51ba8e8c0e0de4c7ba5af30973e8 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 11 Nov 2021 15:20:24 +0100
Subject: [PATCH 12/29] Work on fixes after testing the implementation of
 traversion. Revert building of all benchmarks

---
 src/Benchmarks/CMakeLists.txt                 |  21 +--
 .../HeatEquationGrid/HeatmapGrid/main.h       | 124 +++++++++++++-----
 2 files changed, 95 insertions(+), 50 deletions(-)

diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 4ef9404fc..6fa019081 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -1,6 +1,5 @@
-#add_subdirectory( HeatEquation )
+add_subdirectory( HeatEquation )
 add_subdirectory( HeatEquationGrid )
-<<<<<<< HEAD
 add_subdirectory( BLAS )
 add_subdirectory( NDArray )
 add_subdirectory( SpMV )
@@ -9,21 +8,3 @@ add_subdirectory( LinearSolvers )
 add_subdirectory( ODESolvers )
 add_subdirectory( Sorting )
 add_subdirectory( Traversers )
-=======
-#add_subdirectory( BLAS )
-#add_subdirectory( NDArray )
-#add_subdirectory( SpMV )
-#add_subdirectory( DistSpMV )
-#add_subdirectory( LinearSolvers )
-#add_subdirectory( ODESolvers )
-#add_subdirectory( Sorting )
-#add_subdirectory( Traversers )
-
-set( headers
-         Benchmarks.h
-         FunctionTimer.h
-         Logging.h
-)
-
-install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
->>>>>>> 049219f08 (Implement dimension set/get)
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 0f9d32fb0..89b8253f6 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -12,7 +12,6 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Array.h>
 #include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/ParallelFor.h>
 
 #pragma once
@@ -34,15 +33,59 @@ template<int Dimension,
          typename = std::enable_if_t<std::is_integral<Index>::value>>
 class GridEntity {
    public:
-      GridEntity(const Container<Dimension, Index>& origin,
-                 const Container<Dimension, Index>& dimensions,
-                 const std::bitset<Dimension>& direction): origin(origin),
-                                                           dimensions(dimensions),
-                                                           direction(direction) {};
+      __cuda_callable__ inline
+      GridEntity(const Container<Dimension, Index>& startPosition,
+                 const Index& startOffset,
+                 const Index& offset,
+                 const Container<Dimension, Index>& dimensions):
+             //    const std::bitset<Dimension> direction) :
+                 startPosition(startPosition),
+                 startOffset(startOffset),
+                 position({}),
+                 offset(offset),
+                 dimensions(dimensions) {}//,
+                 //direction(direction) {};
+      __cuda_callable__
+      ~GridEntity() {};
+
+      __cuda_callable__ inline
+      Container<Dimension, Index> getPosition() noexcept {
+         if (position.getSize() == 0) {
+            auto position = startPosition;
+            Index tmpOffset = offset;
+
+            Index dim = Dimension - 1;
+
+            while (offset != 0) {
+               Index newIndex = position[dim] + tmpOffset, dimension = dimensions[dim];
+               Index quotient = newIndex / dimension;
+               Index reminder = newIndex - (dimension * quotient);
+
+               position[dim] = reminder;
+               tmpOffset = quotient;
+
+               dim -= 1;
+            }
+
+            this -> position = position;
+         }
+
+         return position;
+      }
+
+      __cuda_callable__ inline
+      Index getIndex() const noexcept {
+         return startOffset + offset;
+      }
    private:
-      Container<Dimension, Index> origin;
-      Container<Dimension, Index> dimensions;
-      std::bitset<Dimension> direction;
+      const Container<Dimension, Index> startPosition;
+      const Index startOffset;
+
+      Container<Dimension, Index> position;
+      const Index offset;
+
+      const Container<Dimension, Index> dimensions;
+    //  const std::bitset<Dimension> direction;
 };
 
 template<int Dimension,
@@ -84,8 +127,6 @@ class Grid {
       }
       /**
        * @param[in] indicies - A dimension index pack
-       *
-       * - TODO: - Add __cuda_callable__
        */
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
@@ -148,7 +189,7 @@ class Grid {
       template<typename Function, typename... FunctionArgs>
       void traverse(const Container<Dimension, Index>& start,
                     const Container<Dimension, Index>& end,
-                    const TNL::Containers::Vector<std::bitset<Dimension>>& directions,
+                    //const TNL::Containers::Array<std::bitset<Dimension>, Device>& directions,
                     Function function,
                     FunctionArgs... args) const noexcept {
          // TODO: - This will overflow for higher dimensions
@@ -167,30 +208,22 @@ class Grid {
          }
 
          // TODO: - Improve message formatting
-         TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start..<end]")
+         TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start..<end]");
          TNL_ASSERT_LE(endCollapsedIndex, this -> cumulativeDimensionMap[0], "End must be less, than amount of points in grid");
 
          auto dimensions = this -> dimensions;
+        // auto directionsView = directions.getConstView();
 
-         auto outerFunction = [=] __cuda_callable__ (const Index i, FunctionArgs... args) mutable {
-            Index dim = Dimension - 1;
+         auto outerFunction = [=] __cuda_callable__ (Index offset, FunctionArgs... args) mutable {
+            GridEntity<Dimension, Index> entity{ start, startCollapsedIndex, offset, dimensions };
 
-            while (i != 0) {
-               Index newIndex = start[dim] + i, dimension = dimensions[dim];
-               Index quotient = newIndex / dimension;
-               Index reminder = newIndex - (dimension * quotient);
+            function(entity, args...);
 
-               start[dim] = reminder;
-               i = quotient;
+           // for (Index j = 0; j < directionsView.getSize(); j++) {
+            //   GridEntity<Dimension, Index> entity{ start, startCollapsedIndex, j, dimensions, directionsView[i] };
 
-               dim -= 1;
-            }
 
-            for (const auto& direction: directions) {
-               GridEntity<Dimension, Index> entity = { start, dimensions, direction };
-
-               function(entity, args...);
-            }
+            //}
          };
 
          TNL::Algorithms::ParallelFor<Device>::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...);
@@ -271,15 +304,46 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    ux = 0;
    aux = 0;
 
+  /* auto init = [=] __cuda_callable__(const auto& entity) mutable {
+      // auto index = j * params.xSize + i;
 
+      //auto x = i * hx - params.xDomainSize / 2;
+      //auto y = j * hy - params.yDomainSize / 2;
+
+      //uxView[index] = exp(params.sigma * (x * x + y * y));
+   };*/
+
+  /* grid.traverse({ 1, 1 },
+                 { grid.getDimension(0) - 1, grid.getDimension(1) - 1 },
+                 { { 0b00 } },
+                 init);*/
 
    return false;
 };
 
 int main(int argc, char *argv[]) {
-   Grid<3, int> grid;
+   Grid<3, int, TNL::Devices::Cuda> grid;
+
+   grid.setDimensions(3, 3, 3);
+
+   auto fn = [=] __cuda_callable__ (int index) {
+      //printf("%d\n", index);
+   };
+
+   grid.traverseAll(fn);
+
+   auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) {
+     // entity.doSomething();
+      //printf("%d\n", entity.getPosition());
+      //printf("%d \n", entity.getIndex());
+   };
+
+   //TNL::Containers::Array<std::bitset<3>> directions;
+
+   //directions.resize(1);
+   //directions[0] = { 0b000 };
 
-   grid.setDimensions(3, 2, 1);
+   grid.traverse({ 0, 0, 0 }, {3, 3, 3}, fn_entity);
 
    return 0;
 }
-- 
GitLab


From 8801e9738c87f98e57935fbfe0c16cfa53bf8bba Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 11 Nov 2021 16:35:24 +0100
Subject: [PATCH 13/29] Fix traversal by coordinates. Change idea of grid
 dimensions size. Now it is set as the amount of points

---
 .../HeatEquationGrid/HeatmapGrid/main.h       | 54 +++++++++----------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 89b8253f6..a2974a12f 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -33,30 +33,32 @@ template<int Dimension,
          typename = std::enable_if_t<std::is_integral<Index>::value>>
 class GridEntity {
    public:
-      __cuda_callable__ inline
+      __cuda_callable__ inline explicit
       GridEntity(const Container<Dimension, Index>& startPosition,
                  const Index& startOffset,
                  const Index& offset,
-                 const Container<Dimension, Index>& dimensions):
-             //    const std::bitset<Dimension> direction) :
+                 const Container<Dimension, Index>& dimensions,
+                 const Container<Dimension, bool> direction) :
                  startPosition(startPosition),
                  startOffset(startOffset),
-                 position({}),
                  offset(offset),
-                 dimensions(dimensions) {}//,
-                 //direction(direction) {};
+                 dimensions(dimensions),
+                 direction(direction) {
+        this -> position = -1;
+      };
+
       __cuda_callable__
       ~GridEntity() {};
 
       __cuda_callable__ inline
       Container<Dimension, Index> getPosition() noexcept {
-         if (position.getSize() == 0) {
+         if (position[0] == -1) {
             auto position = startPosition;
             Index tmpOffset = offset;
 
             Index dim = Dimension - 1;
 
-            while (offset != 0) {
+            while (tmpOffset) {
                Index newIndex = position[dim] + tmpOffset, dimension = dimensions[dim];
                Index quotient = newIndex / dimension;
                Index reminder = newIndex - (dimension * quotient);
@@ -85,7 +87,7 @@ class GridEntity {
       const Index offset;
 
       const Container<Dimension, Index> dimensions;
-    //  const std::bitset<Dimension> direction;
+      const Container<Dimension, bool> direction;
 };
 
 template<int Dimension,
@@ -99,7 +101,7 @@ class Grid {
 
       /**
        *  @brief - Specifies dimensions of the grid
-       *  @param[in] dimensions - A parameter pack, which specifies edges count in the specific dimension.
+       *  @param[in] dimensions - A parameter pack, which specifies points count in the specific dimension.
        *                          Most significant dimension is in the beginning of the list.
        *                          Least significant dimension is in the end of the list
        */
@@ -110,6 +112,7 @@ class Grid {
          Index i = 0;
 
          for (auto x: { dimensions... }) {
+            TNL_ASSERT_GT(x, 0, "Dimension must be positive");
             this -> dimensions[i] = x;
             i++;
          }
@@ -189,7 +192,7 @@ class Grid {
       template<typename Function, typename... FunctionArgs>
       void traverse(const Container<Dimension, Index>& start,
                     const Container<Dimension, Index>& end,
-                    //const TNL::Containers::Array<std::bitset<Dimension>, Device>& directions,
+                    const TNL::Containers::Array<Container<Dimension, bool>, Device>& directions,
                     Function function,
                     FunctionArgs... args) const noexcept {
          // TODO: - This will overflow for higher dimensions
@@ -212,18 +215,14 @@ class Grid {
          TNL_ASSERT_LE(endCollapsedIndex, this -> cumulativeDimensionMap[0], "End must be less, than amount of points in grid");
 
          auto dimensions = this -> dimensions;
-        // auto directionsView = directions.getConstView();
+         auto directionsView = directions.getConstView();
 
          auto outerFunction = [=] __cuda_callable__ (Index offset, FunctionArgs... args) mutable {
-            GridEntity<Dimension, Index> entity{ start, startCollapsedIndex, offset, dimensions };
-
-            function(entity, args...);
-
-           // for (Index j = 0; j < directionsView.getSize(); j++) {
-            //   GridEntity<Dimension, Index> entity{ start, startCollapsedIndex, j, dimensions, directionsView[i] };
+            for (Index j = 0; j < directionsView.getSize(); j++) {
+               GridEntity<Dimension, Index> entity{ start, startCollapsedIndex, offset, dimensions, directionsView[j] };
 
-
-            //}
+               function(entity, args...);
+            }
          };
 
          TNL::Algorithms::ParallelFor<Device>::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...);
@@ -269,7 +268,7 @@ class Grid {
                int result = 1;
 
                for (std::size_t k = 0; k < combinationBuffer.size(); k++)
-                  result *= combinationBuffer[k] ? dimensions[k] : dimensions[k] + 1;
+                  result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k];
 
                dimensionMap[j] = result;
                cumulativeDimensionMap[i] += result;
@@ -322,7 +321,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 };
 
 int main(int argc, char *argv[]) {
-   Grid<3, int, TNL::Devices::Cuda> grid;
+   Grid<3, int, TNL::Devices::Host> grid;
 
    grid.setDimensions(3, 3, 3);
 
@@ -333,17 +332,12 @@ int main(int argc, char *argv[]) {
    grid.traverseAll(fn);
 
    auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) {
-     // entity.doSomething();
-      //printf("%d\n", entity.getPosition());
-      //printf("%d \n", entity.getIndex());
+      printf("%d \n", entity.getIndex());
    };
 
-   //TNL::Containers::Array<std::bitset<3>> directions;
-
-   //directions.resize(1);
-   //directions[0] = { 0b000 };
+   Container<3, int> direction { 0, 0, 0};
 
-   grid.traverse({ 0, 0, 0 }, {3, 3, 3}, fn_entity);
+   grid.traverse({ 1, 1, 1 }, { 2, 2, 2 }, { direction }, fn_entity);
 
    return 0;
 }
-- 
GitLab


From 394f4f6e56554562966d9c0149555e7640ac4bc6 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Wed, 17 Nov 2021 14:19:37 +0100
Subject: [PATCH 14/29] Finalise n-dimensional grid

---
 .../HeatEquationGrid/HeatmapGrid/main.h       | 224 ++++++++++++------
 1 file changed, 157 insertions(+), 67 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index a2974a12f..f9fe564a5 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -33,33 +33,45 @@ template<int Dimension,
          typename = std::enable_if_t<std::is_integral<Index>::value>>
 class GridEntity {
    public:
-      __cuda_callable__ inline explicit
-      GridEntity(const Container<Dimension, Index>& startPosition,
-                 const Index& startOffset,
-                 const Index& offset,
-                 const Container<Dimension, Index>& dimensions,
-                 const Container<Dimension, bool> direction) :
-                 startPosition(startPosition),
-                 startOffset(startOffset),
-                 offset(offset),
-                 dimensions(dimensions),
-                 direction(direction) {
+      struct Context {
+         public:
+            const int startOffset;
+
+            const Container<Dimension, Index> startPosition;
+            const Container<Dimension, Index> traverseRectDimensions;
+            const Container<Dimension, bool> direction;
+
+            Context(const int startOffset,
+                    const Container<Dimension, Index> startPosition,
+                    const Container<Dimension, Index> traverseRectDimensions,
+                    const Container<Dimension, bool> direction):
+                    startOffset(startOffset),
+                    startPosition(startPosition),
+                    traverseRectDimensions(traverseRectDimensions),
+                    direction(direction) {}
+      };
+
+      __cuda_callable__
+      inline explicit GridEntity(const Context& context,
+                                 const Index& vertexIndex) :
+                                 vertexIndex(vertexIndex),
+                                 context(context) {
         this -> position = -1;
       };
 
       __cuda_callable__
       ~GridEntity() {};
 
-      __cuda_callable__ inline
-      Container<Dimension, Index> getPosition() noexcept {
+      __cuda_callable__
+      inline Container<Dimension, Index> getPosition() noexcept {
          if (position[0] == -1) {
-            auto position = startPosition;
-            Index tmpOffset = offset;
+            Container<Dimension, Index> position = 0;
+            Index tmpOffset = vertexIndex - context.startOffset;
 
             Index dim = Dimension - 1;
 
             while (tmpOffset) {
-               Index newIndex = position[dim] + tmpOffset, dimension = dimensions[dim];
+               Index newIndex = position[dim] + tmpOffset, dimension = context.traverseRectDimensions[dim];
                Index quotient = newIndex / dimension;
                Index reminder = newIndex - (dimension * quotient);
 
@@ -69,25 +81,22 @@ class GridEntity {
                dim -= 1;
             }
 
-            this -> position = position;
+            for (Index i = 0; i < Dimension; i++)
+               this -> position[i] = position[i] + context.startPosition[i];
          }
 
          return position;
       }
 
-      __cuda_callable__ inline
-      Index getIndex() const noexcept {
-         return startOffset + offset;
+      __cuda_callable__
+      inline Index getIndex() const noexcept {
+         return vertexIndex;
       }
    private:
-      const Container<Dimension, Index> startPosition;
-      const Index startOffset;
-
       Container<Dimension, Index> position;
-      const Index offset;
 
-      const Container<Dimension, Index> dimensions;
-      const Container<Dimension, bool> direction;
+      const Index vertexIndex;
+      const Context& context;
 };
 
 template<int Dimension,
@@ -123,7 +132,7 @@ class Grid {
        * @param[in] index - index of dimension
        */
       inline __cuda_callable__ Index getDimension(Index index) const noexcept {
-         TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
          TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension");
 
          return dimensions[index];
@@ -135,7 +144,7 @@ class Grid {
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
                 typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
       Container<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indicies) const noexcept {
-         TNL::Containers::StaticArray<sizeof...(DimensionIndex), Index> result{indicies...};
+         Container<sizeof...(DimensionIndex), Index> result{indicies...};
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
             result[i] = this -> getDimension(result[i]);
@@ -146,7 +155,7 @@ class Grid {
        * @param[in] index - index of dimension
        */
       inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept {
-         TNL_ASSERT_GT(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
          TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension");
 
          return cumulativeDimensionMap(index);
@@ -165,6 +174,29 @@ class Grid {
 
          return result;
       }
+      /**
+       * @param[in] index - index of dimension
+       */
+      inline __cuda_callable__ Index getEndIndex(Index index) const noexcept {
+         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_LT(index, Dimension, "Index must be less than or equal to Dimension");
+
+         return this -> getDimension(index) - 1;
+      }
+       /**
+       * @brief - Returns the last index of specific dimensions
+       */
+      template <typename... DimensionIndex,
+         typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
+         typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
+         Container<sizeof...(DimensionIndex), Index> getEndIndicies(DimensionIndex... indicies) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{ indicies... };
+
+         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
+            result[i] = this->getEndIndex(result[i]);
+
+         return result;
+      }
       /**
        * @brief - Traversers all elements in the grid
        */
@@ -190,42 +222,41 @@ class Grid {
        *
        */
       template<typename Function, typename... FunctionArgs>
-      void traverse(const Container<Dimension, Index>& start,
-                    const Container<Dimension, Index>& end,
+      void traverse(const Container<Dimension, Index>& firstVertex,
+                    const Container<Dimension, Index>& secondVertex,
                     const TNL::Containers::Array<Container<Dimension, bool>, Device>& directions,
                     Function function,
                     FunctionArgs... args) const noexcept {
-         // TODO: - This will overflow for higher dimensions
-         Index startCollapsedIndex = 0;
-         Index endCollapsedIndex = 0;
-
-         // TODO: - For higher dimensions, this will overflow.
-         Container<Dimension, Index> multipliers = 1;
+         Index verticesCount = 1, offset = 0;
+         Container<Dimension, Index> traverseRectDimensions, offsetPosition, multipliers;
 
          for (Index i = 0; i < Dimension; i++) {
-            for (Index j = i + 1; j < Dimension; j++)
-               multipliers[i] *= dimensions[j];
+            traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1;
+            verticesCount *= traverseRectDimensions[i];
+            offsetPosition[i] = std::min(firstVertex[i], secondVertex[i]);
 
-            startCollapsedIndex += start[i] * multipliers[i];
-            endCollapsedIndex += end[i] * multipliers[i];
-         }
+            multipliers[i] = i == 0 ? 1 : multipliers[i - 1] * dimensions[i];
+            offset += offsetPosition[i] * multipliers[i];
 
-         // TODO: - Improve message formatting
-         TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start..<end]");
-         TNL_ASSERT_LE(endCollapsedIndex, this -> cumulativeDimensionMap[0], "End must be less, than amount of points in grid");
+            TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range");
+            TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range");
+         }
 
-         auto dimensions = this -> dimensions;
-         auto directionsView = directions.getConstView();
+         using Context = typename GridEntity<Dimension, Index>::Context;
 
-         auto outerFunction = [=] __cuda_callable__ (Index offset, FunctionArgs... args) mutable {
-            for (Index j = 0; j < directionsView.getSize(); j++) {
-               GridEntity<Dimension, Index> entity{ start, startCollapsedIndex, offset, dimensions, directionsView[j] };
+         auto outerFunction = [function] __cuda_callable__(Index offset, const Context& context, FunctionArgs... args) mutable {
+            GridEntity<Dimension, Index> entity{ context, context.startOffset + offset };
 
-               function(entity, args...);
-            }
+            function(entity, args...);
          };
 
-         TNL::Algorithms::ParallelFor<Device>::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...);
+         Index lowerBound = 0, upperBound = verticesCount;
+
+         for (Index i = 0; i < directions.getSize(); i++) {
+            const Context context{ offset, offsetPosition, traverseRectDimensions, directions[i] };
+
+            TNL::Algorithms::ParallelFor<Device>::exec(lowerBound, upperBound, outerFunction, context, args...);
+         }
       }
    private:
       Container<Dimension, Index> dimensions;
@@ -303,23 +334,81 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    ux = 0;
    aux = 0;
 
-  /* auto init = [=] __cuda_callable__(const auto& entity) mutable {
-      // auto index = j * params.xSize + i;
+   auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable {
+      auto position = entity.getPosition();
+      auto index = entity.getIndex();
+
+      auto x = position[0] * hx - params.xDomainSize / 2;
+      auto y = position[1] * hx - params.yDomainSize / 2;
+
+      uxView[index] = exp(params.sigma * (x * x + y * y));
+   };
+
+   const Container<2, bool> direction{ false, false };
+
+   grid.traverse({ 1, 1 },
+                 { grid.getEndIndex(0), grid.getEndIndex(1) },
+                 { direction },
+                 init);
+
+   if (!writeGNUPlot("data.txt", params, ux))
+      return false;
+
+   auto xDimension = grid.getDimension(0);
+
+   auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable {
+      auto index = entity.getIndex();
+
+      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
+                       (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv;
+   };
+
+   auto update = [=] __cuda_callable__(int i) mutable {
+      uxView[i] += auxView[i] * timestep;
+   };
+
+   Real start = 0;
 
-      //auto x = i * hx - params.xDomainSize / 2;
-      //auto y = j * hy - params.yDomainSize / 2;
+   while (start < params.finalTime) {
+      grid.traverse({ 1, 1 },
+                    { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 },
+                    { direction },
+                    next);
 
-      //uxView[index] = exp(params.sigma * (x * x + y * y));
-   };*/
+      grid.traverseAll(update);
 
-  /* grid.traverse({ 1, 1 },
-                 { grid.getDimension(0) - 1, grid.getDimension(1) - 1 },
-                 { { 0b00 } },
-                 init);*/
+      start += timestep;
+   }
 
-   return false;
+   return writeGNUPlot("data_final.txt", params, ux);
 };
 
+int main(int argc, char* argv[]) {
+   using Real = double;
+
+   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
+
+   TNL::Config::ParameterContainer parameters;
+   if (!parseCommandLine(argc, argv, config, parameters))
+      return EXIT_FAILURE;
+
+   auto device = parameters.getParameter<TNL::String>("device");
+   auto params = HeatmapSolver<Real>::Parameters(parameters);
+
+   HeatmapSolver<Real> solver;
+
+   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+      return EXIT_FAILURE;
+
+#ifdef HAVE_CUDA
+   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+      return EXIT_FAILURE;
+#endif
+
+   return EXIT_SUCCESS;
+}
+
+/*
 int main(int argc, char *argv[]) {
    Grid<3, int, TNL::Devices::Host> grid;
 
@@ -332,12 +421,13 @@ int main(int argc, char *argv[]) {
    grid.traverseAll(fn);
 
    auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) {
-      printf("%d \n", entity.getIndex());
+      printf("%d %d %d \n", entity.getPosition()[0], entity.getPosition()[1], entity.getPosition()[2]);
    };
 
-   Container<3, int> direction { 0, 0, 0};
+   Container<3, int> direction { 0, 0, 0 };
 
-   grid.traverse({ 1, 1, 1 }, { 2, 2, 2 }, { direction }, fn_entity);
+   grid.traverse({ 1, 1, 1 }, { 1, 1, 1 }, { direction }, fn_entity);
 
    return 0;
 }
+*/
-- 
GitLab


From 6a3159aa498a421318889e44ced4a101794a19a4 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Wed, 17 Nov 2021 14:45:13 +0100
Subject: [PATCH 15/29] Fix boundaries of getEndIndex

---
 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index f9fe564a5..6506e8181 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -347,7 +347,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    const Container<2, bool> direction{ false, false };
 
    grid.traverse({ 1, 1 },
-                 { grid.getEndIndex(0), grid.getEndIndex(1) },
+                 { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 },
                  { direction },
                  init);
 
-- 
GitLab


From 84aeba4a2f13ca795cfca46443c7dd3bdd7c7186 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Wed, 17 Nov 2021 14:47:05 +0100
Subject: [PATCH 16/29] Fix typo: indicies -> indices

---
 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
index 6506e8181..40d7bef31 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
@@ -138,13 +138,13 @@ class Grid {
          return dimensions[index];
       }
       /**
-       * @param[in] indicies - A dimension index pack
+       * @param[in] indices - A dimension index pack
        */
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
                 typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-      Container<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indicies) const noexcept {
-         Container<sizeof...(DimensionIndex), Index> result{indicies...};
+      Container<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{indices...};
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
             result[i] = this -> getDimension(result[i]);
@@ -166,8 +166,8 @@ class Grid {
       template <typename... DimensionIndex,
                 typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
                 typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-      Container<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indicies) const noexcept {
-         Container<sizeof...(DimensionIndex), Index> result{indicies...};
+      Container<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{indices...};
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
             result[i] = this -> getEntitiesCount(result[i]);
@@ -189,8 +189,8 @@ class Grid {
       template <typename... DimensionIndex,
          typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
          typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-         Container<sizeof...(DimensionIndex), Index> getEndIndicies(DimensionIndex... indicies) const noexcept {
-         Container<sizeof...(DimensionIndex), Index> result{ indicies... };
+         Container<sizeof...(DimensionIndex), Index> getEndindices(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{ indices... };
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
             result[i] = this->getEndIndex(result[i]);
-- 
GitLab


From d5f6b2fa42a43afd3a228b04142672cf00161845 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Wed, 17 Nov 2021 17:09:06 +0100
Subject: [PATCH 17/29] Add traversers to Grid1D.

---
 .../HeatEquationGrid/CMakeLists.txt           |  3 +-
 .../HeatmapGrid/CMakeLists.txt                |  5 -
 .../HeatmapNDimGrid/CMakeLists.txt            |  5 +
 .../{HeatmapGrid => HeatmapNDimGrid}/main.cpp |  0
 .../{HeatmapGrid => HeatmapNDimGrid}/main.cu  |  0
 .../{HeatmapGrid => HeatmapNDimGrid}/main.h   |  0
 .../HeatmapTNLGrid/CMakeLists.txt             |  5 +
 .../HeatEquationGrid/HeatmapTNLGrid/main.cpp  |  2 +
 .../HeatEquationGrid/HeatmapTNLGrid/main.cu   |  2 +
 .../HeatEquationGrid/HeatmapTNLGrid/main.h    | 32 +++++++
 src/TNL/Meshes/GridDetails/Grid1D.h           | 20 +++-
 src/TNL/Meshes/GridDetails/Grid1D_impl.h      | 94 +++++++++++++++++++
 12 files changed, 161 insertions(+), 7 deletions(-)
 delete mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
 rename src/Benchmarks/HeatEquationGrid/{HeatmapGrid => HeatmapNDimGrid}/main.cpp (100%)
 rename src/Benchmarks/HeatEquationGrid/{HeatmapGrid => HeatmapNDimGrid}/main.cu (100%)
 rename src/Benchmarks/HeatEquationGrid/{HeatmapGrid => HeatmapNDimGrid}/main.h (100%)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h

diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
index 9adc7520d..de5592b60 100644
--- a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt
@@ -1,2 +1,3 @@
 ADD_SUBDIRECTORY( HeatmapParallelFor )
-ADD_SUBDIRECTORY( HeatmapGrid )
+ADD_SUBDIRECTORY( HeatmapNDimGrid )
+ADD_SUBDIRECTORY( HeatmapTNLGrid )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
deleted file mode 100644
index 6649d0fb2..000000000
--- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu )
-ELSE()
-   add_executable( benchmark_heat_grid main.h main.cpp )
-ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
new file mode 100644
index 000000000..a17375117
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cu )
+ELSE()
+   add_executable( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp
rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu
rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h
rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt
new file mode 100644
index 000000000..5672634e3
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( benchmark_heat_tnl_grid main.h main.cu )
+ELSE()
+   add_executable( benchmark_heat_tnl_grid main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
new file mode 100644
index 000000000..bb54c4982
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
@@ -0,0 +1,32 @@
+
+#pragma once
+
+#include "TNL/Meshes/GridDetails/Grid1D.h"
+
+using Grid = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>;
+
+int main(int argc, char *argv[]) {
+   Grid grid;
+
+   grid.setDimensions({ 100 });
+
+   auto fn0 = [=] __cuda_callable__(const Grid::EntityType<0>&entity, int variant) mutable {
+      printf("%d %d\n", variant, entity.getCoordinates().x());
+   };
+
+   auto fn1 = [=] __cuda_callable__(const Grid::EntityType<1>& entity, int variant) mutable {
+      printf("%d %d\n", variant, entity.getCoordinates().x());
+   };
+
+   grid.forAll<0>(fn0, 0);//, "All 0 dim:");
+   grid.forInterior<0>(fn0, 1);//, "Interior 0 dim:");
+   grid.forBoundary<0>(fn0, 2);//, "Boundary 0 dim:");
+
+
+   grid.forAll<1>(fn1, 3);//, "All 1 dim:");
+   grid.forInterior<1>(fn1, 4);//, "Interior 1 dim:");
+   grid.forBoundary<1>(fn1, 5);//, "Boundary 1 dim:");
+
+   return 0;
+}
+
diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h
index 64f783a17..e7e31326b 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D.h
@@ -17,6 +17,7 @@
 #include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 #include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/GridEntityConfig.h>
+#include <TNL/Algorithms/ParallelFor.h>
 
 namespace TNL {
 namespace Meshes {
@@ -176,11 +177,28 @@ public:
    const RealType& getSpaceStepsProducts() const;
 
    /**
-    * \breif Returns the measure (length) of a cell in this grid.
+    * \brief Returns the measure (length) of a cell in this grid.
     */
    __cuda_callable__
    inline const RealType& getCellMeasure() const;
 
+   /**
+    * \brief Traverses all elements
+    */
+   template<int EntityDimension, typename Func, typename... FuncArgs>
+   void forAll(Func func, FuncArgs... args) const;
+
+   /**
+    * \brief Traversers boundary elements
+    */
+   template<int EntityDimension, typename Func, typename... FuncArgs>
+   void forBoundary(Func func, FuncArgs... args) const;
+
+   /**
+    * \brief Traversers interior elements
+    */
+   template<int EntityDimension, typename Func, typename... FuncArgs>
+   void forInterior(Func func, FuncArgs... args) const;
    /**
     * \brief Returns the smallest length of step out of all coordinates (axes).
     */
diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
index 0ad33b103..8d73408fa 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
@@ -336,6 +336,100 @@ getCellMeasure() const
    return this->template getSpaceStepsProducts< 1 >();
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+template<int EntityDimension, typename Func, typename... FuncArgs>
+void Grid<1, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
+   static_assert(EntityDimension >= 0 && EntityDimension <= 1, "Entity dimension must be either 0 or 1");
+
+   auto outer = [=] __cuda_callable__(Index i, const Grid<1, Real, Device, Index>&grid, FuncArgs... args) mutable {
+      EntityType<EntityDimension> entity(grid);
+
+      entity.setCoordinates(i);
+      entity.refresh();
+
+      func(entity, args...);
+   };
+
+   switch (EntityDimension) {
+   case 0:
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x() + 1, outer, *this, args...);
+      break;
+   case 1:
+      TNL::Algorithms::ParallelFor<Device>::exec(localBegin.x(), localEnd.x(), outer, *this, args...);
+      break;
+   default: break;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+template<int EntityDimension, typename Func, typename... FuncArgs>
+void Grid<1, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) const {
+   static_assert(EntityDimension >= 0 && EntityDimension <= 1, "Entity dimension must be either 0 or 1");
+
+   auto outer = [=] __cuda_callable__(Index i, const Grid<1, Real, Device, Index>&grid, FuncArgs... args) mutable {
+      EntityType<EntityDimension> entity(grid);
+
+      entity.setCoordinates(i);
+      entity.refresh();
+
+      func(entity, args...);
+   };
+
+   switch (EntityDimension) {
+   case 0:
+      outer(0, *this, args...);
+      outer(getDimensions().x(), *this, args...);
+      break;
+   case 1:
+      if (localBegin < interiorBegin && interiorEnd < localEnd) {
+         outer(interiorBegin.x() - 1, *this, args...);
+         outer(interiorEnd.x(), *this, args...);
+         break;
+      }
+
+      if (localBegin < interiorBegin) {
+         outer(interiorBegin.x() - 1, *this, args...);
+         break;
+      }
+
+      if (interiorEnd < localEnd)
+         outer(interiorEnd.x(), *this, args...);
+      break;
+   default: break;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+template<int EntityDimension, typename Func, typename... FuncArgs>
+void Grid<1, Real, Device, Index>::forInterior(Func func, FuncArgs... args) const {
+   static_assert(EntityDimension >= 0 && EntityDimension <= 1, "Entity dimension must be either 0 or 1");
+
+   auto outer = [=] __cuda_callable__(Index i, const Grid<1, Real, Device, Index>&grid, FuncArgs... args) mutable {
+      EntityType<EntityDimension> entity(grid);
+
+      entity.setCoordinates(i);
+      entity.refresh();
+
+      func(entity, args...);
+   };
+
+   switch (EntityDimension) {
+   case 0:
+      TNL::Algorithms::ParallelFor<Device>::exec(1, dimensions.x(), outer, *this, args...);
+      break;
+   case 1:
+      TNL::Algorithms::ParallelFor<Device>::exec(interiorBegin.x(), interiorEnd.x(), outer, *this, args...);
+      break;
+   default: break;
+   }
+}
+
 template< typename Real,
           typename Device,
           typename Index >
-- 
GitLab


From 0dbc6599a28b40b719cf2807da18c3a888f868a5 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 18 Nov 2021 14:35:11 +0100
Subject: [PATCH 18/29] Implement 2D grid traversal methods

---
 .../HeatEquationGrid/HeatmapTNLGrid/main.h    |  57 ++++-
 src/TNL/Meshes/GridDetails/Grid2D.h           |  18 ++
 src/TNL/Meshes/GridDetails/Grid2D_impl.h      | 217 ++++++++++++++++++
 3 files changed, 280 insertions(+), 12 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
index bb54c4982..45154f05c 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
@@ -3,30 +3,63 @@
 
 #include "TNL/Meshes/GridDetails/Grid1D.h"
 
-using Grid = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>;
+using Grid1D = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>;
+using Grid2D = TNL::Meshes::Grid<2, float, TNL::Devices::Cuda, int>;
 
-int main(int argc, char *argv[]) {
-   Grid grid;
+void check1D() {
+   Grid1D grid;
 
    grid.setDimensions({ 100 });
 
-   auto fn0 = [=] __cuda_callable__(const Grid::EntityType<0>&entity, int variant) mutable {
+   auto fn0 = [=] __cuda_callable__(const Grid1D::EntityType<0>&entity, int variant) mutable {
       printf("%d %d\n", variant, entity.getCoordinates().x());
-   };
+      };
 
-   auto fn1 = [=] __cuda_callable__(const Grid::EntityType<1>& entity, int variant) mutable {
+   auto fn1 = [=] __cuda_callable__(const Grid1D::EntityType<1>&entity, int variant) mutable {
       printf("%d %d\n", variant, entity.getCoordinates().x());
    };
 
-   grid.forAll<0>(fn0, 0);//, "All 0 dim:");
-   grid.forInterior<0>(fn0, 1);//, "Interior 0 dim:");
-   grid.forBoundary<0>(fn0, 2);//, "Boundary 0 dim:");
+   grid.forAll<0>(fn0, 0);
+   grid.forInterior<0>(fn0, 1);
+   grid.forBoundary<0>(fn0, 2);
+
+   grid.forAll<1>(fn1, 3);
+   grid.forInterior<1>(fn1, 4);
+   grid.forBoundary<1>(fn1, 5);
+}
+
+void check2D() {
+   Grid2D grid;
+
+   grid.setDimensions({ 10, 10 });
 
+   auto fn0 = [=] __cuda_callable__(const Grid2D::EntityType<0>&entity, int variant) mutable {
+      printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
+   };
 
-   grid.forAll<1>(fn1, 3);//, "All 1 dim:");
-   grid.forInterior<1>(fn1, 4);//, "Interior 1 dim:");
-   grid.forBoundary<1>(fn1, 5);//, "Boundary 1 dim:");
+   auto fn1 = [=] __cuda_callable__(const Grid2D::EntityType<1>&entity, int variant) mutable {
+      printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
+   };
 
+   auto fn2 = [=] __cuda_callable__(const Grid2D::EntityType<2>&entity, int variant) mutable {
+      printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
+   };
+
+   grid.forAll<0>(fn0, 0);
+   grid.forInterior<0>(fn0, 1);
+   grid.forBoundary<0>(fn0, 2);
+
+   grid.forAll<1>(fn1, 3);
+   grid.forInterior<1>(fn1, 4);
+   grid.forBoundary<1>(fn1, 5);
+
+   grid.forAll<2>(fn2, 6);
+   grid.forInterior<2>(fn2, 7);
+   grid.forBoundary<2>(fn2, 8);
+}
+
+int main(int argc, char *argv[]) {
+   check2D();
    return 0;
 }
 
diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index d11487b5d..3f9f6e9dd 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -186,6 +186,24 @@ class Grid< 2, Real, Device, Index >
    __cuda_callable__
    inline RealType getSmallestSpaceStep() const;
 
+   /**
+    * \brief Traverses all elements
+    */
+   template<int EntityDimension, typename Func, typename... FuncArgs>
+   void forAll(Func func, FuncArgs... args) const;
+
+   /**
+    * \brief Traversers interior elements
+    */
+   template<int EntityDimension, typename Func, typename... FuncArgs>
+   void forInterior(Func func, FuncArgs... args) const;
+
+   /**
+    * \brief Traversers boundary elements
+    */
+   template<int EntityDimension, typename Func, typename... FuncArgs>
+   void forBoundary(Func func, FuncArgs... args) const;
+
    void writeProlog( Logger& logger ) const;
 
    protected:
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index fc290d23d..4b5d80af7 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -414,6 +414,223 @@ Real Grid< 2, Real, Device, Index > :: getSmallestSpaceStep() const
    return min( this->spaceSteps.x(), this->spaceSteps.y() );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index >
+template<int EntityDimension, typename Func, typename... FuncArgs>
+void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
+   static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]");
+
+   auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable {
+      EntityType<EntityDimension> entity(grid);
+
+      entity.setCoordinates({ i, j });
+      entity.refresh();
+
+      func(entity, args...);
+   };
+
+   switch (EntityDimension) {
+   case 0:
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0, dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...);
+      break;
+   case 1: {
+      auto outerOriented = [=] __cuda_callable__(Index i, Index j,
+                                                 const Grid<2, Real, Device, Index>&grid,
+                                                 const CoordinatesType & orientation,
+                                                 const CoordinatesType & basis,
+                                                 FuncArgs... args) mutable {
+         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation, basis);
+
+         entity.refresh();
+
+         func(entity, args...);
+      };
+
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
+                                                   dimensions.x() + 1, dimensions.y(),
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(1, 0),
+                                                   CoordinatesType(0, 1),
+                                                   args...);
+
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
+                                                   dimensions.x(), dimensions.y() + 1,
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(0, 1),
+                                                   CoordinatesType(1, 0),
+                                                   args...);
+      break;
+   }
+   case 2:
+      TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), localBegin.y(), localEnd.x(), localEnd.y(), outer, * this, args...);
+      break;
+   default: break;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+template<int EntityDimension, typename Func, typename... FuncArgs>
+void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) const {
+   static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]");
+
+   auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable {
+      EntityType<EntityDimension> entity(grid);
+
+      entity.setCoordinates({ i, j });
+      entity.refresh();
+
+      func(entity, args...);
+   };
+
+   switch (EntityDimension) {
+   case 0:
+      TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, dimensions.x(), dimensions.y(), outer, *this, args...);
+      break;
+   case 1: {
+      auto outerOriented = [=] __cuda_callable__(Index i, Index j,
+                                                 const Grid<2, Real, Device, Index>&grid,
+                                                 const CoordinatesType& orientation,
+                                                 const CoordinatesType& basis,
+                                                 FuncArgs... args) mutable {
+         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation, basis);
+
+         entity.refresh();
+
+         func(entity, args...);
+      };
+
+      TNL::Algorithms::ParallelFor2D<Device>::exec(1, 0,
+                                                   dimensions.x(), dimensions.y(),
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(1, 0),
+                                                   CoordinatesType(0, 1),
+                                                   args...);
+
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 1,
+                                                   dimensions.x(), dimensions.y(),
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(0, 1),
+                                                   CoordinatesType(1, 0),
+                                                   args...);
+      break;
+   }
+   case 2:
+      TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x(), interiorBegin.y(), interiorEnd.x(), interiorEnd.y(), outer, *this, args...);
+      break;
+   default:
+      break;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+template<int EntityDimension, typename Func, typename... FuncArgs>
+void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) const {
+   static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]");
+
+   auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable {
+      EntityType<EntityDimension> entity(grid);
+
+      entity.setCoordinates({ i, j });
+      entity.refresh();
+
+      func(entity, args...);
+   };
+
+   switch (EntityDimension) {
+   case 0:
+      // Lower horizontal
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0, dimensions.x() + 1, 1, outer, *this, args...);
+      // Upper horizontal
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, dimensions.y(), dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...);
+      // Left vertical
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0, 1, dimensions.y() + 1, outer, *this, args...);
+      // Right vertical
+      TNL::Algorithms::ParallelFor2D<Device>::exec(dimensions.x(), 0, dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...);
+      break;
+   case 1: {
+      auto outerOriented = [=] __cuda_callable__(Index i, Index j,
+                                                 const Grid<2, Real, Device, Index>&grid,
+                                                 const CoordinatesType & orientation,
+                                                 const CoordinatesType & basis,
+                                                 FuncArgs... args) mutable {
+         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation, basis);
+
+         entity.refresh();
+
+         func(entity, args...);
+      };
+
+      // Lower horizontal
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
+                                                   dimensions.x() + 1, 1,
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(1, 0),
+                                                   CoordinatesType(0, 1),
+                                                   args...);
+      // Upper horizontal
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, dimensions.y() - 1,
+                                                   dimensions.x() + 1, dimensions.y(),
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(1, 0),
+                                                   CoordinatesType(0, 1),
+                                                   args...);
+      // Left vertical
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
+                                                   1, dimensions.y(),
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(0, 1),
+                                                   CoordinatesType(1, 0),
+                                                   args...);
+      // Right vertical
+      TNL::Algorithms::ParallelFor2D<Device>::exec(dimensions.x() - 1, 0,
+                                                   dimensions.x(), dimensions.y(),
+                                                   outerOriented,
+                                                   *this,
+                                                   CoordinatesType(0, 1),
+                                                   CoordinatesType(1, 0),
+                                                   args...);
+      break;
+   }
+   case 2:
+      if (localBegin < interiorBegin && interiorEnd < localEnd) {
+         // Lower horizontal
+         TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x() - 1, interiorBegin.y() - 1, interiorEnd.x() + 1, interiorBegin.y() + 1, outer, *this, args...);
+         // Upper horizontal
+         TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x() - 1, interiorEnd.y() - 1, interiorEnd.x() + 1, interiorEnd.y() + 1, outer, *this, args...);
+         // Left vertical
+         TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x() -1, interiorBegin.y() - 1, interiorBegin.x() + 1, interiorEnd.y() + 1, outer, *this, args...);
+         // Right vertical
+         TNL::Algorithms::ParallelFor2D<Device>::exec(interiorEnd.x() - 1, interiorBegin.y() - 1, interiorEnd.x() + 1, interiorEnd.y() + 1, outer, *this, args...);
+         return;
+      }
+
+      // Lower horizontal
+      TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), localBegin.y(), localEnd.x(), interiorBegin.y(), outer, *this, args...);
+      // Upper horizontal
+      TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), interiorEnd.y(), localEnd.x(), localEnd.y(), outer, *this, args...);
+      // Left vertical
+      TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), interiorBegin.y(), interiorBegin.x(), interiorEnd.y(), outer, *this, args...);
+      // Right vertical
+      TNL::Algorithms::ParallelFor2D<Device>::exec(interiorEnd.x(), interiorBegin.y(), localEnd.x(), interiorEnd.y(), outer, *this, args...);
+
+      break;
+   default:
+      break;
+   }
+}
+
 template< typename Real,
           typename Device,
           typename Index >
-- 
GitLab


From 8b76f89003553bcd77c35dcd989b515da9aee175 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 25 Nov 2021 14:16:11 +0100
Subject: [PATCH 19/29] Fix indexing of the NDim grid

---
 .../HeatEquationGrid/HeatmapNDimGrid/main.h   | 182 ++++++++++--------
 1 file changed, 107 insertions(+), 75 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
index 40d7bef31..fbd536f8f 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
@@ -33,59 +33,20 @@ template<int Dimension,
          typename = std::enable_if_t<std::is_integral<Index>::value>>
 class GridEntity {
    public:
-      struct Context {
-         public:
-            const int startOffset;
-
-            const Container<Dimension, Index> startPosition;
-            const Container<Dimension, Index> traverseRectDimensions;
-            const Container<Dimension, bool> direction;
-
-            Context(const int startOffset,
-                    const Container<Dimension, Index> startPosition,
-                    const Container<Dimension, Index> traverseRectDimensions,
-                    const Container<Dimension, bool> direction):
-                    startOffset(startOffset),
-                    startPosition(startPosition),
-                    traverseRectDimensions(traverseRectDimensions),
-                    direction(direction) {}
-      };
-
       __cuda_callable__
-      inline explicit GridEntity(const Context& context,
-                                 const Index& vertexIndex) :
+      inline explicit GridEntity(const Index& vertexIndex,
+                                 const Container<Dimension, Index>& coordinates,
+                                 const Container<Dimension, bool>& direction) :
                                  vertexIndex(vertexIndex),
-                                 context(context) {
-        this -> position = -1;
-      };
+                                 coordinates(coordinates),
+                                 direction(direction) {};
 
       __cuda_callable__
       ~GridEntity() {};
 
       __cuda_callable__
-      inline Container<Dimension, Index> getPosition() noexcept {
-         if (position[0] == -1) {
-            Container<Dimension, Index> position = 0;
-            Index tmpOffset = vertexIndex - context.startOffset;
-
-            Index dim = Dimension - 1;
-
-            while (tmpOffset) {
-               Index newIndex = position[dim] + tmpOffset, dimension = context.traverseRectDimensions[dim];
-               Index quotient = newIndex / dimension;
-               Index reminder = newIndex - (dimension * quotient);
-
-               position[dim] = reminder;
-               tmpOffset = quotient;
-
-               dim -= 1;
-            }
-
-            for (Index i = 0; i < Dimension; i++)
-               this -> position[i] = position[i] + context.startPosition[i];
-         }
-
-         return position;
+      inline Container<Dimension, Index> getCoordinates() const noexcept {
+         return coordinates;
       }
 
       __cuda_callable__
@@ -93,10 +54,9 @@ class GridEntity {
          return vertexIndex;
       }
    private:
-      Container<Dimension, Index> position;
-
       const Index vertexIndex;
-      const Context& context;
+      const Container<Dimension, Index> coordinates;
+      const Container<Dimension, bool> direction;
 };
 
 template<int Dimension,
@@ -189,7 +149,7 @@ class Grid {
       template <typename... DimensionIndex,
          typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
          typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-         Container<sizeof...(DimensionIndex), Index> getEndindices(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> getEndIndices(DimensionIndex... indices) const noexcept {
          Container<sizeof...(DimensionIndex), Index> result{ indices... };
 
          for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
@@ -227,25 +187,31 @@ class Grid {
                     const TNL::Containers::Array<Container<Dimension, bool>, Device>& directions,
                     Function function,
                     FunctionArgs... args) const noexcept {
-         Index verticesCount = 1, offset = 0;
-         Container<Dimension, Index> traverseRectDimensions, offsetPosition, multipliers;
+         Index verticesCount = 1;
+         Container<Dimension, Index> traverseRectOrigin, traverseRectDimensions,
+                                     dimensionsProducts, traverseRectDimensionsProducts;
 
          for (Index i = 0; i < Dimension; i++) {
             traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1;
             verticesCount *= traverseRectDimensions[i];
-            offsetPosition[i] = std::min(firstVertex[i], secondVertex[i]);
-
-            multipliers[i] = i == 0 ? 1 : multipliers[i - 1] * dimensions[i];
-            offset += offsetPosition[i] * multipliers[i];
+            traverseRectOrigin[i] = std::min(firstVertex[i], secondVertex[i]);
 
             TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range");
             TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range");
          }
 
-         using Context = typename GridEntity<Dimension, Index>::Context;
+         dimensionsProducts = getDimensionProducts(dimensions);
+         traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions);
 
-         auto outerFunction = [function] __cuda_callable__(Index offset, const Context& context, FunctionArgs... args) mutable {
-            GridEntity<Dimension, Index> entity{ context, context.startOffset + offset };
+         auto outerFunction = [=] __cuda_callable__(Index offset,
+                                                    const Container<Dimension, Index>& traverseRectOrigin,
+                                                    const Container<Dimension, Index>& traverseRectDimensions,
+                                                    const Container<Dimension, Index>& traverseRectDimensionsProducts,
+                                                    const Container<Dimension, Index>& dimensionsProducts,
+                                                    FunctionArgs... args) mutable {
+            auto entity = this -> makeEntitity(offset,
+                                               traverseRectOrigin, traverseRectDimensions,
+                                               traverseRectDimensionsProducts, dimensionsProducts);
 
             function(entity, args...);
          };
@@ -253,9 +219,12 @@ class Grid {
          Index lowerBound = 0, upperBound = verticesCount;
 
          for (Index i = 0; i < directions.getSize(); i++) {
-            const Context context{ offset, offsetPosition, traverseRectDimensions, directions[i] };
-
-            TNL::Algorithms::ParallelFor<Device>::exec(lowerBound, upperBound, outerFunction, context, args...);
+            TNL::Algorithms::ParallelFor<Device>::exec(lowerBound, upperBound, outerFunction,
+                                                       traverseRectOrigin,
+                                                       traverseRectDimensions,
+                                                       traverseRectDimensionsProducts,
+                                                       dimensionsProducts,
+                                                       args...);
          }
       }
    private:
@@ -308,6 +277,73 @@ class Grid {
             } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end()));
          }
       }
+
+      __cuda_callable__ inline
+      GridEntity<Dimension, Index> makeEntitity(const Index index,
+                                                const Container<Dimension, Index>& traverseRectOrigin,
+                                                const Container<Dimension, Index>& traverseRectDimensions,
+                                                const Container<Dimension, Index>& traverseRectDimensionsProducts,
+                                                const Container<Dimension, Index>& dimensionsProducts) const {
+         const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions);
+         Container<Dimension, Index> globalCoordinates = 0;
+
+         for (Index i = 0; i < Dimension; i++)
+            globalCoordinates[i] = traverseRectOrigin[i] + traverseCoordinates[i];
+
+         const auto globalIndex = getIndex(globalCoordinates, dimensionsProducts);
+
+         return GridEntity<Dimension, Index>(globalIndex, globalCoordinates, {});
+      }
+      /**
+       * Calculates position in the specific boundaries
+       */
+      __cuda_callable__ inline
+      Container<Dimension, Index> getCoordinates(const Index index, const Container<Dimension, Index> &dimensions) const {
+         Container<Dimension, Index> coordinates = 0;
+         Index tmpIndex = index;
+
+         Index dim = Dimension - 1;
+
+         // TODO: -  Implement overflow check.
+         while (tmpIndex) {
+            Index dimension = dimensions[dim],
+                  quotient = tmpIndex / dimension,
+                  reminder = tmpIndex - (dimension * quotient);
+
+            coordinates[dim] = reminder;
+            tmpIndex = quotient;
+
+            dim -= 1;
+         }
+
+         return coordinates;
+      }
+      /**
+       * Calculates product matrix based on the dimension
+       */
+      __cuda_callable__ inline
+      Container<Dimension, Index> getDimensionProducts(const Container<Dimension, Index>& dimensions) const noexcept {
+         Container<Dimension, Index> products = 0;
+
+         for (Index i = Dimension; i > 0; i--)
+            products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i];
+
+         return products;
+      }
+      /**
+       * Calculates index based on the dimension
+       */
+      __cuda_callable__ inline
+      Index getIndex(const Container<Dimension, Index>& coordinates,
+                     const Container<Dimension, Index>& dimensionProducts) const {
+         Index index = 0;
+
+         for (Index i = 0; i < Dimension; i++)
+            index += coordinates[i] * dimensionProducts[i];
+
+         return index;
+      }
+
 };
 
 template <typename Real>
@@ -335,7 +371,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    aux = 0;
 
    auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable {
-      auto position = entity.getPosition();
+      auto position = entity.getCoordinates();
       auto index = entity.getIndex();
 
       auto x = position[0] * hx - params.xDomainSize / 2;
@@ -383,6 +419,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    return writeGNUPlot("data_final.txt", params, ux);
 };
 
+
 int main(int argc, char* argv[]) {
    using Real = double;
 
@@ -408,25 +445,20 @@ int main(int argc, char* argv[]) {
    return EXIT_SUCCESS;
 }
 
+
 /*
 int main(int argc, char *argv[]) {
-   Grid<3, int, TNL::Devices::Host> grid;
-
-   grid.setDimensions(3, 3, 3);
-
-   auto fn = [=] __cuda_callable__ (int index) {
-      //printf("%d\n", index);
-   };
+   Grid<2, int, TNL::Devices::Cuda> grid;
 
-   grid.traverseAll(fn);
+   grid.setDimensions(5, 5);
 
-   auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) {
-      printf("%d %d %d \n", entity.getPosition()[0], entity.getPosition()[1], entity.getPosition()[2]);
+   auto fn_entity = [=] __cuda_callable__ (GridEntity<2, int> entity) {
+      printf("%d %d %d\n", entity.getCoordinates()[0], entity.getCoordinates()[1], entity.getIndex());
    };
 
-   Container<3, int> direction { 0, 0, 0 };
+   Container<2, int> direction { 0, 0 };
 
-   grid.traverse({ 1, 1, 1 }, { 1, 1, 1 }, { direction }, fn_entity);
+   grid.traverse({ 1, 1 }, { 4, 4 }, { direction }, fn_entity);
 
    return 0;
 }
-- 
GitLab


From c77d6ff52d75467e1b3df76245b8bf66fa66bdc5 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 25 Nov 2021 14:21:57 +0100
Subject: [PATCH 20/29] Remove distributed grid indexing from 1D grid

---
 src/TNL/Meshes/GridDetails/Grid1D_impl.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
index 8d73408fa..03886faf7 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
@@ -357,7 +357,10 @@ void Grid<1, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
       TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x() + 1, outer, *this, args...);
       break;
    case 1:
-      TNL::Algorithms::ParallelFor<Device>::exec(localBegin.x(), localEnd.x(), outer, *this, args...);
+      //TODO: - Update for distributed grid
+      //TNL::Algorithms::ParallelFor<Device>::exec(localBegin.x(), localEnd.x(), outer, *this, args...);
+
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x(), outer, *this, args...);
       break;
    default: break;
    }
@@ -381,11 +384,16 @@ void Grid<1, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
 
    switch (EntityDimension) {
    case 0:
-      outer(0, *this, args...);
-      outer(getDimensions().x(), *this, args...);
+      // TODO: - Implement call within a single kernel
+      TNL::Algorithms::ParallelFor<Device>::exec(0, 1, outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(getDimensions().x(), getDimensions().x() + 1, outer, *this, args...);
       break;
    case 1:
-      if (localBegin < interiorBegin && interiorEnd < localEnd) {
+      TNL::Algorithms::ParallelFor<Device>::exec(0, 1, outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(getDimensions().x() - 1, getDimensions().x(), outer, *this, args...);
+
+      // TODO: - Verify for distributed grid
+      /*if (localBegin < interiorBegin && interiorEnd < localEnd) {
          outer(interiorBegin.x() - 1, *this, args...);
          outer(interiorEnd.x(), *this, args...);
          break;
@@ -397,7 +405,7 @@ void Grid<1, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
       }
 
       if (interiorEnd < localEnd)
-         outer(interiorEnd.x(), *this, args...);
+         outer(interiorEnd.x(), *this, args...);*/
       break;
    default: break;
    }
@@ -424,7 +432,10 @@ void Grid<1, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons
       TNL::Algorithms::ParallelFor<Device>::exec(1, dimensions.x(), outer, *this, args...);
       break;
    case 1:
-      TNL::Algorithms::ParallelFor<Device>::exec(interiorBegin.x(), interiorEnd.x(), outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(1, dimensions.x() - 1, outer, *this, args...);
+
+      // TODO: - Verify for distributed grids
+      //TNL::Algorithms::ParallelFor<Device>::exec(interiorBegin.x(), interiorEnd.x(), outer, *this, args...);
       break;
    default: break;
    }
-- 
GitLab


From 0829a319f81da5d190d9512a2ebcaf3a07cf104e Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 25 Nov 2021 14:22:59 +0100
Subject: [PATCH 21/29] Remove neighbour grid entities/get neighbours storage

---
 src/TNL/Meshes/GridDetails/GridEntity_impl.h | 85 ++------------------
 src/TNL/Meshes/GridEntity.h                  | 27 -------
 2 files changed, 5 insertions(+), 107 deletions(-)

diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
index fb5548f35..d91b5f449 100644
--- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
@@ -19,17 +19,6 @@
 namespace TNL {
 namespace Meshes {
 
-/*template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,          typename Config,
-          int EntityDimension >
-__cuda_callable__ inline
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension >::
-GridEntity()
-{
-}*/
-
 template< int Dimension,
           typename Real,
           typename Device,
@@ -43,8 +32,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid )
   entityIndex( -1 ),
   coordinates( 0 ),
   orientation( 0 ),
-  basis( 0 ),
-  neighborEntitiesStorage( *this )
+  basis( 0 )
 {
 }
 
@@ -64,8 +52,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid,
   entityIndex( -1 ),
   coordinates( coordinates ),
   orientation( orientation ),
-  basis( basis ),
-  neighborEntitiesStorage( *this )
+  basis( basis )
 {
 }
 
@@ -123,7 +110,6 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Con
 refresh()
 {
    this->entityIndex = this->grid.getEntityIndex( *this );
-   this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
 }
 
 template< int Dimension,
@@ -205,21 +191,6 @@ setBasis( const EntityBasisType& basis )
    this->orientation = EntityOrientationType( 1 ) - abs( basis );
 }
 
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          int EntityDimension,
-          typename Config >
-   template< int NeighborEntityDimension >
-__cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::template NeighborEntities< NeighborEntityDimension >&
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::
-getNeighborEntities() const
-{
-   return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >();
-}
-
 template< int Dimension,
           typename Real,
           typename Device,
@@ -276,19 +247,6 @@ getMesh() const
    return this->grid;
 }
 
-/****
- * Specialization for cells
- */
-/*template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__ inline
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension >::
-GridEntity()
-{
-}*/
-
 template< int Dimension,
           typename Real,
           typename Device,
@@ -298,8 +256,7 @@ __cuda_callable__ inline
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::
 GridEntity( const GridType& grid )
 : grid( grid ),
-  entityIndex( -1 ),
-  neighborEntitiesStorage( *this )
+  entityIndex( -1 )
 {
    this->coordinates = CoordinatesType( ( Index ) 0 );
 }
@@ -317,8 +274,7 @@ GridEntity( const GridType& grid,
                const EntityBasisType& basis )
 : grid( grid ),
   entityIndex( -1 ),
-  coordinates( coordinates ),
-  neighborEntitiesStorage( *this )
+  coordinates( coordinates )
 {
 }
 
@@ -372,7 +328,6 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:
 refresh()
 {
    this->entityIndex = this->grid.getEntityIndex( *this );
-   this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
 }
 
 template< int Dimension,
@@ -419,20 +374,6 @@ getBasis() const
    return EntityBasisType( ( IndexType ) 1 );
 }
 
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          typename Config >
-   template< int NeighborEntityDimension >
-__cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::template NeighborEntities< NeighborEntityDimension >&
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::
-getNeighborEntities() const
-{
-   return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >();
-}
-
 template< int Dimension,
           typename Real,
           typename Device,
@@ -531,8 +472,7 @@ GridEntity( const GridType& grid,
                const EntityBasisType& basis )
 : grid( grid ),
   entityIndex( -1 ),
-  coordinates( coordinates ),
-  neighborEntitiesStorage( *this )
+  coordinates( coordinates )
 {
 }
 
@@ -586,7 +526,6 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
 refresh()
 {
    this->entityIndex = this->grid.getEntityIndex( *this );
-   this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
 }
 
 template< int Dimension,
@@ -635,20 +574,6 @@ getBasis() const
    return EntityBasisType( ( IndexType ) 0 );
 }
 
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          typename Config >
-   template< int NeighborEntityDimension >
-__cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::template NeighborEntities< NeighborEntityDimension >&
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
-getNeighborEntities() const
-{
-   return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >();
-}
-
 template< int Dimension,
           typename Real,
           typename Device,
diff --git a/src/TNL/Meshes/GridEntity.h b/src/TNL/Meshes/GridEntity.h
index 573089dc3..19e13c531 100644
--- a/src/TNL/Meshes/GridEntity.h
+++ b/src/TNL/Meshes/GridEntity.h
@@ -60,8 +60,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
       typedef typename GridType::PointType PointType;
 
-      typedef NeighborGridEntitiesStorage< GridEntity, Config > NeighborGridEntitiesStorageType;
-
       template< int NeighborEntityDimension = getEntityDimension() >
       using NeighborEntities =
          NeighborGridEntityGetter<
@@ -112,11 +110,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       __cuda_callable__ inline
       void setBasis( const EntityBasisType& basis );
 
-      template< int NeighborEntityDimension = getEntityDimension() >
-      __cuda_callable__ inline
-      const NeighborEntities< NeighborEntityDimension >&
-      getNeighborEntities() const;
-
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
 
@@ -141,8 +134,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
 
       EntityBasisType basis;
 
-      NeighborGridEntitiesStorageType neighborEntitiesStorage;
-
       //__cuda_callable__ inline
       //GridEntity();
 
@@ -175,10 +166,8 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
 
       constexpr static int getEntityDimension() { return getMeshDimension(); };
 
-
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
-      typedef NeighborGridEntitiesStorage< GridEntity, Config > NeighborGridEntitiesStorageType;
 
       template< int NeighborEntityDimension = getEntityDimension() >
       using NeighborEntities =
@@ -231,11 +220,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       __cuda_callable__ inline
       void setBasis( const EntityBasisType& basis ){};
 
-      template< int NeighborEntityDimension = Dimension >
-      __cuda_callable__ inline
-      const NeighborEntities< NeighborEntityDimension >&
-      getNeighborEntities() const;
-
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
 
@@ -259,8 +243,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
 
       CoordinatesType coordinates;
 
-      NeighborGridEntitiesStorageType neighborEntitiesStorage;
-
       //__cuda_callable__ inline
       //GridEntity();
 
@@ -295,7 +277,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
 
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
-      typedef NeighborGridEntitiesStorage< GridEntity, Config > NeighborGridEntitiesStorageType;
 
       template< int NeighborEntityDimension = getEntityDimension() >
       using NeighborEntities =
@@ -348,12 +329,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       __cuda_callable__ inline
       void setBasis( const EntityBasisType& basis ){};
 
-
-      template< int NeighborEntityDimension = getEntityDimension() >
-      __cuda_callable__ inline
-      const NeighborEntities< NeighborEntityDimension >&
-      getNeighborEntities() const;
-
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
 
@@ -381,8 +356,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
 
       CoordinatesType coordinates;
 
-      NeighborGridEntitiesStorageType neighborEntitiesStorage;
-
       friend class BoundaryGridEntityChecker< GridEntity >;
 
       friend class GridEntityCenterGetter< GridEntity >;
-- 
GitLab


From 7d87876180c14769353058afbabb789b6d5b5c2a Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 25 Nov 2021 23:18:41 +0100
Subject: [PATCH 22/29] Update Grid2D to efficiently process boundary entities

---
 src/TNL/Meshes/GridDetails/Grid2D.h          |   4 +-
 src/TNL/Meshes/GridDetails/Grid2D_impl.h     | 129 ++++++++++++-------
 src/TNL/Meshes/GridDetails/GridEntity_impl.h |   4 +-
 3 files changed, 90 insertions(+), 47 deletions(-)

diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index 3f9f6e9dd..18a21c0ea 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -124,7 +124,7 @@ class Grid< 2, Real, Device, Index >
     */
    template< int EntityDimension >
    __cuda_callable__
-   IndexType getEntitiesCount() const;
+   Index getEntitiesCount() const;
 
    /**
     * \brief Gets number of entities in this grid.
@@ -132,7 +132,7 @@ class Grid< 2, Real, Device, Index >
     */
    template< typename Entity >
    __cuda_callable__
-   inline IndexType getEntitiesCount() const;
+   inline Index getEntitiesCount() const;
 
    /**
     * \brief See Grid1D::getEntity().
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index 4b5d80af7..65da95107 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -465,7 +465,10 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
       break;
    }
    case 2:
-      TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), localBegin.y(), localEnd.x(), localEnd.y(), outer, * this, args...);
+      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0, dimensions.x(), dimensions.y(), outer, *this, args...);
+
+      // TODO: - Verify for distributed grids
+      //TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), localBegin.y(), localEnd.x(), localEnd.y(), outer, * this, args...);
       break;
    default: break;
    }
@@ -493,7 +496,7 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons
       break;
    case 1: {
       auto outerOriented = [=] __cuda_callable__(Index i, Index j,
-                                                 const Grid<2, Real, Device, Index>&grid,
+                                                 Grid<2, Real, Device, Index>& grid,
                                                  const CoordinatesType& orientation,
                                                  const CoordinatesType& basis,
                                                  FuncArgs... args) mutable {
@@ -522,7 +525,10 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons
       break;
    }
    case 2:
-      TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x(), interiorBegin.y(), interiorEnd.x(), interiorEnd.y(), outer, *this, args...);
+      TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, dimensions.x() - 1, dimensions.y() - 1, outer, *this, args...);
+
+      // TODO: - Verify for distributed grids
+      //TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x(), interiorBegin.y(), interiorEnd.x(), interiorEnd.y(), outer, *this, args...);
       break;
    default:
       break;
@@ -536,10 +542,19 @@ template<int EntityDimension, typename Func, typename... FuncArgs>
 void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) const {
    static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]");
 
-   auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable {
+   auto outer = [=] __cuda_callable__(Index i, Index axis, Index axisIndex, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable {
       EntityType<EntityDimension> entity(grid);
 
-      entity.setCoordinates({ i, j });
+      switch (axis) {
+      case 0:
+         entity.setCoordinates({ axisIndex, i });
+         break;
+      case 1:
+         entity.setCoordinates({ i, axisIndex });
+         break;
+      default: TNL_ASSERT_TRUE(false, "Received axis index. Expect in range [0..<1]");
+      }
+
       entity.refresh();
 
       func(entity, args...);
@@ -548,21 +563,37 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
    switch (EntityDimension) {
    case 0:
       // Lower horizontal
-      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0, dimensions.x() + 1, 1, outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x() + 1, outer, 0, 0, *this, args...);
       // Upper horizontal
-      TNL::Algorithms::ParallelFor2D<Device>::exec(0, dimensions.y(), dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x() + 1, outer, 0, dimensions.y(), *this, args...);
       // Left vertical
-      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0, 1, dimensions.y() + 1, outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.y() + 1, outer, 1, 0, *this, args...);
       // Right vertical
-      TNL::Algorithms::ParallelFor2D<Device>::exec(dimensions.x(), 0, dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.y() + 1, outer, 1, dimensions.x(), *this, args...);
       break;
    case 1: {
-      auto outerOriented = [=] __cuda_callable__(Index i, Index j,
-                                                 const Grid<2, Real, Device, Index>&grid,
+      auto outerOriented = [=] __cuda_callable__(Index i,
+                                                 Index axis,
+                                                 Index axisIndex,
+                                                 const Grid<2, Real, Device, Index>& grid,
                                                  const CoordinatesType & orientation,
                                                  const CoordinatesType & basis,
                                                  FuncArgs... args) mutable {
-         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation, basis);
+         CoordinatesType coordinates;
+
+         switch (axis) {
+         case 0:
+            coordinates[0] = axisIndex;
+            coordinates[1] = i;
+            break;
+         case 1:
+            coordinates[0] = i;
+            coordinates[1] = axisIndex;
+            break;
+         default: TNL_ASSERT_TRUE(false, "Received axis index. Expect in range [0..<1]");
+         }
+
+         EntityType<EntityDimension> entity(grid, coordinates, orientation, basis);
 
          entity.refresh();
 
@@ -570,41 +601,54 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
       };
 
       // Lower horizontal
-      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
-                                                   dimensions.x() + 1, 1,
-                                                   outerOriented,
-                                                   *this,
-                                                   CoordinatesType(1, 0),
-                                                   CoordinatesType(0, 1),
-                                                   args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0,
+                                                 dimensions.x(),
+                                                 outerOriented,
+                                                 0, 0,
+                                                 *this,
+                                                 CoordinatesType(1, 0),
+                                                 CoordinatesType(0, 1),
+                                                 args...);
       // Upper horizontal
-      TNL::Algorithms::ParallelFor2D<Device>::exec(0, dimensions.y() - 1,
-                                                   dimensions.x() + 1, dimensions.y(),
-                                                   outerOriented,
-                                                   *this,
-                                                   CoordinatesType(1, 0),
-                                                   CoordinatesType(0, 1),
-                                                   args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0,
+                                                 dimensions.x(),
+                                                 outerOriented,
+                                                 0, dimensions.y(),
+                                                 *this,
+                                                 CoordinatesType(1, 0),
+                                                 CoordinatesType(0, 1),
+                                                 args...);
       // Left vertical
-      TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
-                                                   1, dimensions.y(),
-                                                   outerOriented,
-                                                   *this,
-                                                   CoordinatesType(0, 1),
-                                                   CoordinatesType(1, 0),
-                                                   args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0,
+                                                 dimensions.y(),
+                                                 outerOriented,
+                                                 1, 0,
+                                                 *this,
+                                                 CoordinatesType(0, 1),
+                                                 CoordinatesType(1, 0),
+                                                 args...);
       // Right vertical
-      TNL::Algorithms::ParallelFor2D<Device>::exec(dimensions.x() - 1, 0,
-                                                   dimensions.x(), dimensions.y(),
-                                                   outerOriented,
-                                                   *this,
-                                                   CoordinatesType(0, 1),
-                                                   CoordinatesType(1, 0),
-                                                   args...);
+      TNL::Algorithms::ParallelFor<Device>::exec(0,
+                                                 dimensions.y(),
+                                                 outerOriented,
+                                                 1, dimensions.x(),
+                                                 *this,
+                                                 CoordinatesType(0, 1),
+                                                 CoordinatesType(1, 0),
+                                                 args...);
       break;
    }
    case 2:
-      if (localBegin < interiorBegin && interiorEnd < localEnd) {
+      // Lower horizontal
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x(), outer, 0, 0, *this, args...);
+      // Upper horizontal
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.x(), outer, 0, dimensions.y() - 1, *this, args...);
+      // Left vertical
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.y(), outer, 1, 0, *this, args...);
+      // Right vertical
+      TNL::Algorithms::ParallelFor<Device>::exec(0, dimensions.y(), outer, 1, dimensions.x() - 1, *this, args...);
+      // TODO: - Verify with the distributed grid
+      /*if (localBegin < interiorBegin && interiorEnd < localEnd) {
          // Lower horizontal
          TNL::Algorithms::ParallelFor2D<Device>::exec(interiorBegin.x() - 1, interiorBegin.y() - 1, interiorEnd.x() + 1, interiorBegin.y() + 1, outer, *this, args...);
          // Upper horizontal
@@ -623,8 +667,7 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
       // Left vertical
       TNL::Algorithms::ParallelFor2D<Device>::exec(localBegin.x(), interiorBegin.y(), interiorBegin.x(), interiorEnd.y(), outer, *this, args...);
       // Right vertical
-      TNL::Algorithms::ParallelFor2D<Device>::exec(interiorEnd.x(), interiorBegin.y(), localEnd.x(), interiorEnd.y(), outer, *this, args...);
-
+      TNL::Algorithms::ParallelFor2D<Device>::exec(interiorEnd.x(), interiorBegin.y(), localEnd.x(), interiorEnd.y(), outer, *this, args...);*/
       break;
    default:
       break;
diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
index d91b5f449..922673de5 100644
--- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
@@ -454,11 +454,11 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
 GridEntity( const GridType& grid )
  : grid( grid ),
    entityIndex( -1 ),
-   coordinates( 0 ),
-   neighborEntitiesStorage( *this )
+   coordinates( 0 )
 {
 }
 
+// Basis is Discarded
 template< int Dimension,
           typename Real,
           typename Device,
-- 
GitLab


From 7cf225c3728d37cdbc1563cadcd52d65ec3e510f Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 25 Nov 2021 23:23:41 +0100
Subject: [PATCH 23/29] Implement HeatMap for TNL grid

---
 .../HeatEquationGrid/HeatmapTNLGrid/main.h    | 109 +++++++++++++++++-
 1 file changed, 106 insertions(+), 3 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
index 45154f05c..82b27213d 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
@@ -1,8 +1,110 @@
 
 #pragma once
 
-#include "TNL/Meshes/GridDetails/Grid1D.h"
+#include "TNL/Meshes/GridDetails/Grid2D.h"
+#include "../Base/HeatmapSolver.h"
 
+template <typename Real>
+template <typename Device>
+bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters& params) const {
+   using Grid2D = TNL::Meshes::Grid<2, Real, Device, int>;
+
+   Grid2D grid;
+
+   // Grid implementation defines its dimensions in the amount of edges.
+   // To align it size to all other benchmarks substract 1
+   grid.setDimensions(params.xSize - 1, params.ySize - 1);
+
+   // TODO: - Improve style of access. It is counterintuitive for person, who doesn't know C++ well
+   int verticesCount = grid.template getEntitiesCount<0>();
+
+   TNL::Containers::Array<Real, Device> ux(verticesCount), // data at step u
+                                        aux(verticesCount);// data at step u + 1
+
+   // Invalidate ux/aux
+   ux = 0;
+   aux = 0;
+
+   const Real hx = params.xDomainSize / (Real)params.xSize;
+   const Real hy = params.yDomainSize / (Real)params.ySize;
+   const Real hx_inv = 1 / (hx * hx);
+   const Real hy_inv = 1 / (hy * hy);
+
+   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+
+   auto uxView = ux.getView(),
+        auxView = aux.getView();
+
+   auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable {
+      auto index = entity.getIndex();
+
+      auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2;
+      auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2;
+
+      uxView[index] = exp(params.sigma * (x * x + y * y));
+   };
+
+   grid.template forInterior<0>(init);
+
+   if (!writeGNUPlot("data.txt", params, ux))
+      return false;
+
+   auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
+      auto index = entity.getIndex();
+      auto width = grid.getDimensions().x() + 1;
+
+      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
+                        (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv;
+   };
+
+   auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
+      auto index = entity.getIndex();
+
+      uxView[index] += auxView[index] * timestep;
+   };
+
+   Real start = 0;
+
+   while (start < params.finalTime) {
+      grid.template forInterior<0>(next);
+      grid.template forInterior<0>(update);
+
+      start += timestep;
+   }
+
+   return writeGNUPlot("data_final.txt", params, ux);
+
+   return false;
+}
+
+int main(int argc, char* argv[]) {
+   using Real = double;
+
+   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
+
+   TNL::Config::ParameterContainer parameters;
+   if (!parseCommandLine(argc, argv, config, parameters))
+      return EXIT_FAILURE;
+
+   auto device = parameters.getParameter<TNL::String>("device");
+   auto params = HeatmapSolver<Real>::Parameters(parameters);
+
+   HeatmapSolver<Real> solver;
+
+   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+      return EXIT_FAILURE;
+
+#ifdef HAVE_CUDA
+   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+      return EXIT_FAILURE;
+#endif
+
+   return EXIT_SUCCESS;
+}
+
+
+// TODO: - Move to tests
+/*
 using Grid1D = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>;
 using Grid2D = TNL::Meshes::Grid<2, float, TNL::Devices::Cuda, int>;
 
@@ -45,6 +147,8 @@ void check2D() {
       printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
    };
 
+   std::cout << "Entities count of 0: dimension" << grid.getEntitiesCount<0>() << std::endl;
+
    grid.forAll<0>(fn0, 0);
    grid.forInterior<0>(fn0, 1);
    grid.forBoundary<0>(fn0, 2);
@@ -59,7 +163,6 @@ void check2D() {
 }
 
 int main(int argc, char *argv[]) {
-   check2D();
    return 0;
 }
-
+*/
-- 
GitLab


From a36658056e448483f1ac591e099b646c03bef66b Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Tue, 30 Nov 2021 12:57:34 +0100
Subject: [PATCH 24/29] WIP: Work on Benchmarks for HeatMap grid

---
 .../HeatEquationGrid/Base/HeatmapSolver.h     |  21 +-
 .../Base/HeatmapSolverBenchmark.h             | 172 +++++++
 .../HeatmapNDimGrid/CMakeLists.txt            |   4 +-
 .../HeatmapNDimGrid/implementation.h          | 420 ++++++++++++++++++
 .../HeatEquationGrid/HeatmapNDimGrid/main.h   | 413 +----------------
 5 files changed, 612 insertions(+), 418 deletions(-)
 create mode 100644 src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h

diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
index 92a2b4611..6f3411268 100644
--- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
@@ -21,16 +21,27 @@ public:
       const Real xDomainSize, yDomainSize;
       const Real sigma;
       const Real timeStep, finalTime;
+      const bool outputData;
       const bool verbose;
 
       Parameters(const TNL::Config::ParameterContainer &parameters);
+      Parameters(const int xSize, const int ySize,
+                 const Real xDomainSize, const Real yDomainSize,
+                 const Real sigma,
+                 const Real timeStep, const Real finalTime,
+                 const bool outputData,
+                 const bool verbose):
+                 xSize(xSize), ySize(ySize),
+                 xDomainSize(xDomainSize), yDomainSize(yDomainSize),
+                 sigma(sigma),
+                 timeStep(timeStep), finalTime(finalTime),
+                 outputData(outputData), verbose(verbose) {}
 
       static TNL::Config::ConfigDescription makeInputConfig();
    };
 
    template <typename Device>
    bool solve(const Parameters &parameters) const;
-
 private:
    template <typename Device>
    bool writeGNUPlot(const std::string &filename,
@@ -49,7 +60,6 @@ TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig(
 #ifdef HAVE_CUDA
    config.addEntryEnum<TNL::String>("cuda");
 #endif
-
    config.addEntry<int>("grid-x-size", "Grid size along x-axis.", 100);
    config.addEntry<int>("grid-y-size", "Grid size along y-axis.", 100);
 
@@ -73,14 +83,17 @@ HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContaine
                                                                                                  sigma(parameters.getParameter<Real>("sigma")),
                                                                                                  timeStep(parameters.getParameter<Real>("time-step")),
                                                                                                  finalTime(parameters.getParameter<Real>("final-time")),
+                                                                                                 outputData(parameters.getParameter<bool>("outputData"))
                                                                                                  verbose(parameters.getParameter<bool>("verbose")) {}
 
 template <typename Real>
 template <typename Device>
 bool HeatmapSolver<Real>::writeGNUPlot(const std::string &filename,
                                        const HeatmapSolver<Real>::Parameters &params,
-                                       const TNL::Containers::Array<Real, Device> &map) const
-{
+                                       const TNL::Containers::Array<Real, Device> &map) const {
+   if (!params.outputData)
+      return true;
+
    std::ofstream out(filename, std::ios::out);
 
    if (!out.is_open())
diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
new file mode 100644
index 000000000..2d7fd0504
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
@@ -0,0 +1,172 @@
+
+#include <TNL/Config/parseCommandLine.h>
+
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+
+#include <TNL/Benchmarks/Benchmarks.h>
+
+#include "HeatmapSolver.h"
+
+class HeatmapSolverBenchmark {
+   public:
+      template<typename Real, typename Device>
+      void exec(const HeatmapSolver::Parameters& params) const;
+
+      template< typename Real >
+      void runBenchmark(const Benchmark& benchmark,
+                        const Benchmark::MetadataMap,
+                        const int minXDimension,
+                        const int maxXDimension,
+                        const int xStepSizeFactor,
+                        const int minYDimension,
+                        const int maxYDimension,
+                        const int yStepSizeFactor,
+                        const TNL::Config::ParameterContainer& parameters) const;
+
+      static TNL::Config::ConfigDescription makeInputConfig();
+};
+
+TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() {
+   TNL::Config::ConfigDescription config;
+
+   config.addDelimiter("Benchmark settings:");
+   config.addEntry<TNL::String>("log-file", "Log file name.", "tnl-benchmark-heatmap.log");
+   config.addEntry<TNL::String>("output-mode", "Mode for opening the log file.", "overwrite");
+   config.addEntryEnum("append");
+   config.addEntryEnum("overwrite");
+
+   config.addEntry<TNL::String>("precision", "Precision of the arithmetics.", "double");
+   config.addEntryEnum("float");
+   config.addEntryEnum("double");
+   config.addEntryEnum("all");
+
+   config.addEntry<int>("min-x-dimension", "Minimum dimension over x axis used in the benchmark.", 100);
+   config.addEntry<int>("max-x-dimension", "Maximum dimension over x axis used in the benchmark.", 10000);
+   config.addEntry<int>("x-size-step-factor", "Factor determining the dimension grows over x axis. First size is min-x-dimension and each following size is stepFactor*previousSize, up to max-x-dimension.", 2);
+
+   config.addEntry<int>("min-y-dimension", "Minimum dimension over x axis used in the benchmark.", 100);
+   config.addEntry<int>("max-y-dimension", "Maximum dimension over x axis used in the benchmark.", 10000);
+   config.addEntry<int>("y-size-step-factor", "Factor determining the dimension grows over y axis. First size is min-y-dimension and each following size is stepFactor*previousSize, up to max-y-dimension.", 2);
+
+   config.addEntry<int>("loops", "Number of iterations for every computation.", 10);
+
+   config.addEntry<int>("verbose", "Verbose mode.", 1);
+
+   config.addDelimiter("Problem settings:");
+   config.addEntry<double>("domain-x-size", "Domain size along x-axis.", 2.0);
+   config.addEntry<double>("domain-y-size", "Domain size along y-axis.", 2.0);
+
+   config.addEntry<double>("sigma", "Sigma in exponential initial condition.", 1.0);
+
+   config.addEntry<double>("time-step", "Time step. By default it is proportional to one over space step square.", 0.0);
+   config.addEntry<double>("final-time", "Final time of the simulation.", 0.012);
+
+   config.addDelimiter("Device settings:");
+   TNL::Devices::Host::configSetup( config );
+   TNL::Devices::Cuda::configSetup( config );
+}
+
+template<typename Real,
+         template Device>
+void HeatmapSolverBenchmark::runBenchmark(Benchmark& benchmark,
+                                          Benchmark::MetadataMap metadata,
+                                          const int minXDimension,
+                                          const int maxXDimension,
+                                          const int xStepSizeFactor,
+                                          const int minYDimension,
+                                          const int maxYDimension,
+                                          const int yStepSizeFactor,
+                                          const TNL::Config::ParameterContainer& parameters) const {
+   const TNL::String precision = getType< Real >();
+   metadata["precision"] = precision;
+
+   benchmark.newBenchmark(String("Heatmap grid with (") + precision + ", host allocator" + Device + ")");
+
+   auto xDomainSize = parameters.getParameter<Real>("domain-x-size");
+   auto yDomainSize = parameters.getParameter<Real>("domain-y-size");
+
+   auto sigma = parameters.getParameter<Real>("sigma");
+
+   auto timeStep = parameters.getParameter<Real>("time-step");
+
+   auto finalTime = parameters.getParameter<Real>("final-time");
+
+
+   for(std::size_t xSize = minXDimension; xSize <= maxXDimension; size *= xStepSizeFactor) {
+      for(std::size_t ySize = minXDimension; ySize <= maxXDimension; size *= yStepSizeFactor) {
+         benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+            { "xSize", convertToString(xSize) },
+            { "ySize", convertToString(ySize) }
+         }));
+
+         auto lambda = [=]() {
+            HeatmapSolver::Parameters params(xSize, ySize, xDomainSize, yDomainSize, sigma, timeStep, finalTime, false, false);
+
+            exec(params);
+         };
+
+         benchmark.time<Device>()
+      }
+   }
+}
+
+int main(int argc, char* argv[]) {
+   HeatmapSolverBenchmark solver;
+
+   auto config = HeatmapSolverBenchmark::makeInputConfig();
+
+   TNL::Config::ParameterContainer parameters;
+
+   if (!parseCommandLine(argc, argv, config, parameters))
+      return EXIT_FAILURE;
+
+   if (!TNL::Devices::Host::setup( parameters ) || !TNL::Devices::Cuda::setup( parameters ) )
+      return EXIT_FAILURE;
+
+   const TNL::String logFileName = parameters.getParameter<TNL::String>( "log-file" );
+   const TNL::String outputMode = parameters.getParameter<TNL::String>( "output-mode" );
+   const TNL::String precision = parameters.getParameter<TNL::String>( "precision" );
+
+   const std::size_t minXDimension = parameters.getParameter<int>("min-x-dimension");
+   const std::size_t maxXDimension = parameters.getParameter<int>("max-x-dimension");
+   const std::size_t xSizeStepFactor = parameters.getParameter<int>("x-size-step-factor");
+
+   if(xSizeStepFactor <= 1) {
+      std::cerr << "The value of --x-size-step-factor must be greater than 1." << std::endl;
+      return EXIT_FAILURE;
+   }
+
+   const std::size_t minYDimension = parameters.getParameter<int>("min-y-dimension");
+   const std::size_t maxYDimension = parameters.getParameter<int>("max-y-dimension");
+   const std::size_t ySizeStepFactor = parameters.getParameter<int>("y-size-step-factor");
+
+   const int loops = parameters.getParameter< int >("loops");
+   const int verbose = parameters.getParameter< int >("verbose");
+
+   if(ySizeStepFactor <= 1) {
+      std::cerr << "The value of --y-size-step-factor must be greater than 1." << std::endl;
+      return EXIT_FAILURE;
+   }
+
+   auto mode = std::ios::out;
+
+   if( outputMode == "append" )
+      mode |= std::ios::app;
+
+   std::ofstream logFile( logFileName.getString(), mode );
+
+   TNL::Benchmarks::Benchmark benchmark(loops, verbose);
+
+   if( precision == "all" || precision == "float" )
+      solver.runBenchmark<float>(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters);
+   if( precision == "all" || precision == "double" )
+      solver.runBenchmark<double>(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters);
+
+   if(!benchmark.save(logFile)) {
+      std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
+      return EXIT_FAILURE;
+   }
+
+   return EXIT_SUCCESS;
+}
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
index a17375117..d493b57b4 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cu )
+   CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cu )
 ELSE()
-   add_executable( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cpp )
+   add_executable( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cpp )
 ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
new file mode 100644
index 000000000..ed8ea004d
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
@@ -0,0 +1,420 @@
+
+#include <iostream>
+#include <fstream>
+#include <numeric>
+#include <type_traits>
+#include <array>
+#include <bitset>
+
+#include "../Base/HeatmapSolver.h"
+
+#include <TNL/Config/parseCommandLine.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Containers/Array.h>
+#include <TNL/Containers/StaticArray.h>
+#include <TNL/Algorithms/ParallelFor.h>
+
+#pragma once
+
+template<bool ...> struct bool_pack {};
+
+template <bool... Bs>
+using conjunction = std::is_same<bool_pack<true, Bs...>, bool_pack<Bs..., true>>;
+
+template<int Dimension,
+         typename Index,
+         typename = std::enable_if_t<(Dimension > 0)>,
+         typename = std::enable_if_t<std::is_integral<Index>::value>>
+using Container = TNL::Containers::StaticArray<Dimension, Index>;
+
+template<int Dimension,
+         typename Index,
+         typename = std::enable_if_t<(Dimension > 0)>,
+         typename = std::enable_if_t<std::is_integral<Index>::value>>
+class GridEntity {
+   public:
+      __cuda_callable__
+      inline explicit GridEntity(const Index& vertexIndex,
+                                 const Container<Dimension, Index>& coordinates,
+                                 const Container<Dimension, bool>& direction) :
+                                 vertexIndex(vertexIndex),
+                                 coordinates(coordinates),
+                                 direction(direction) {};
+
+      __cuda_callable__
+      ~GridEntity() {};
+
+      __cuda_callable__
+      inline Container<Dimension, Index> getCoordinates() const noexcept {
+         return coordinates;
+      }
+
+      __cuda_callable__
+      inline Index getIndex() const noexcept {
+         return vertexIndex;
+      }
+   private:
+      const Index vertexIndex;
+      const Container<Dimension, Index> coordinates;
+      const Container<Dimension, bool> direction;
+};
+
+template<int Dimension,
+         typename Index,
+         typename Device = TNL::Devices::Host,
+         typename = std::enable_if_t<(Dimension > 0)>>
+class Grid {
+   public:
+      Grid() {}
+      ~Grid() {}
+
+      /**
+       *  @brief - Specifies dimensions of the grid
+       *  @param[in] dimensions - A parameter pack, which specifies points count in the specific dimension.
+       *                          Most significant dimension is in the beginning of the list.
+       *                          Least significant dimension is in the end of the list
+       */
+      template <typename... Dimensions,
+                typename = std::enable_if_t<conjunction<std::is_same<Index, Dimensions>::value...>::value>,
+                typename = std::enable_if_t<sizeof...(Dimensions) == Dimension>>
+      void setDimensions(Dimensions... dimensions) noexcept {
+         Index i = 0;
+
+         for (auto x: { dimensions... }) {
+            TNL_ASSERT_GT(x, 0, "Dimension must be positive");
+            this -> dimensions[i] = x;
+            i++;
+         }
+
+         refreshDimensionMaps();
+      }
+      /**
+       * @param[in] index - index of dimension
+       */
+      inline __cuda_callable__ Index getDimension(Index index) const noexcept {
+         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension");
+
+         return dimensions[index];
+      }
+      /**
+       * @param[in] indices - A dimension index pack
+       */
+      template <typename... DimensionIndex,
+                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
+                typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
+      Container<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{indices...};
+
+         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
+            result[i] = this -> getDimension(result[i]);
+
+         return result;
+      }
+      /**
+       * @param[in] index - index of dimension
+       */
+      inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept {
+         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension");
+
+         return cumulativeDimensionMap(index);
+      }
+      /**
+       * @brief - Returns the number of entities of specific dimension
+       */
+      template <typename... DimensionIndex,
+                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
+                typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
+      Container<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{indices...};
+
+         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
+            result[i] = this -> getEntitiesCount(result[i]);
+
+         return result;
+      }
+      /**
+       * @param[in] index - index of dimension
+       */
+      inline __cuda_callable__ Index getEndIndex(Index index) const noexcept {
+         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
+         TNL_ASSERT_LT(index, Dimension, "Index must be less than or equal to Dimension");
+
+         return this -> getDimension(index) - 1;
+      }
+       /**
+       * @brief - Returns the last index of specific dimensions
+       */
+      template <typename... DimensionIndex,
+         typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
+         typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
+         Container<sizeof...(DimensionIndex), Index> getEndIndices(DimensionIndex... indices) const noexcept {
+         Container<sizeof...(DimensionIndex), Index> result{ indices... };
+
+         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
+            result[i] = this->getEndIndex(result[i]);
+
+         return result;
+      }
+      /**
+       * @brief - Traversers all elements in the grid
+       */
+      template <typename Function, typename... FunctionArgs>
+      void traverseAll(Function function, FunctionArgs... args) const noexcept {
+         auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable {
+            function(index, args...);
+         };
+
+         TNL::Algorithms::ParallelFor<Device>::exec(0, cumulativeDimensionMap[0], lambda, args...);
+      }
+      /**
+       * @brief - Traverses a grid from start index to end index.
+       *
+       * @param[in] start - a start index of point
+       * @param[in] end - an end index of point
+       * @param[in] directions - A pack of boolean vector flags with the size of the dimension.
+       *   For example, let's have the 3-dimensional grid.
+       *     A pack {false, false, false} will call function for all points
+       *     A pack {true, false, false} will call function for edges directed over dimension at index of true
+       *     A pack {true, true, false} will call function for faces directed over dimension at index of true
+       *     A pack {true, true, true} will call function for cells directed over
+       *
+       */
+      template<typename Function, typename... FunctionArgs>
+      void traverse(const Container<Dimension, Index>& firstVertex,
+                    const Container<Dimension, Index>& secondVertex,
+                    const TNL::Containers::Array<Container<Dimension, bool>, Device>& directions,
+                    Function function,
+                    FunctionArgs... args) const noexcept {
+         Index verticesCount = 1;
+         Container<Dimension, Index> traverseRectOrigin, traverseRectDimensions,
+                                     dimensionsProducts, traverseRectDimensionsProducts;
+
+         for (Index i = 0; i < Dimension; i++) {
+            traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1;
+            verticesCount *= traverseRectDimensions[i];
+            traverseRectOrigin[i] = std::min(firstVertex[i], secondVertex[i]);
+
+            TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range");
+            TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range");
+         }
+
+         dimensionsProducts = getDimensionProducts(dimensions);
+         traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions);
+
+         auto outerFunction = [=] __cuda_callable__(Index offset,
+                                                    const Container<Dimension, Index>& traverseRectOrigin,
+                                                    const Container<Dimension, Index>& traverseRectDimensions,
+                                                    const Container<Dimension, Index>& traverseRectDimensionsProducts,
+                                                    const Container<Dimension, Index>& dimensionsProducts,
+                                                    FunctionArgs... args) mutable {
+            auto entity = this -> makeEntitity(offset,
+                                               traverseRectOrigin, traverseRectDimensions,
+                                               traverseRectDimensionsProducts, dimensionsProducts);
+
+            function(entity, args...);
+         };
+
+         Index lowerBound = 0, upperBound = verticesCount;
+
+         for (Index i = 0; i < directions.getSize(); i++) {
+            TNL::Algorithms::ParallelFor<Device>::exec(lowerBound, upperBound, outerFunction,
+                                                       traverseRectOrigin,
+                                                       traverseRectDimensions,
+                                                       traverseRectDimensionsProducts,
+                                                       dimensionsProducts,
+                                                       args...);
+         }
+      }
+   private:
+      Container<Dimension, Index> dimensions;
+      /**
+       * @brief - A dimension map is a store for dimension limits over all combinations of basis.
+       *          First, (n choose 0) elements will contain the count of 0 dimension elements
+       *          Second, (n choose 1) elements will contain the count of 1-dimension elements
+       *          ....
+       *
+       *          For example, let's have a 3-d grid, then the map indexing will be the next:
+       *            0 - 0 - count of vertices
+       *            1, 2, 3 - count of edges in z, y, x plane
+       *            4, 5, 6 - count of faces in yz, xz, xy plane
+       *            7 - count of cells in z y x plane
+       *
+       * @warning - The ordering of is lexigraphical.
+       */
+      Container<1 << Dimension, Index> dimensionMap;
+      /**
+       * @brief - A cumulative map over dimensions.
+       */
+      Container<Dimension + 1, Index> cumulativeDimensionMap;
+      /**
+       * @brief - Fills dimensions map for N-dimensional Grid.
+       *
+       * @complexity - O(2 ^ Dimension)
+       */
+      void refreshDimensionMaps() noexcept {
+         std::array<bool, Dimension> combinationBuffer = {};
+         std::size_t j = 0;
+
+         for (std::size_t i = 0; i < Dimension + 1; i++)
+            cumulativeDimensionMap[i] = 0;
+
+         for (std::size_t i = 0; i <= Dimension; i++) {
+            std::fill(combinationBuffer.begin(), combinationBuffer.end(), false);
+            std::fill(combinationBuffer.end() - i, combinationBuffer.end(), true);
+
+            do {
+               int result = 1;
+
+               for (std::size_t k = 0; k < combinationBuffer.size(); k++)
+                  result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k];
+
+               dimensionMap[j] = result;
+               cumulativeDimensionMap[i] += result;
+
+               j++;
+            } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end()));
+         }
+      }
+
+      __cuda_callable__ inline
+      GridEntity<Dimension, Index> makeEntitity(const Index index,
+                                                const Container<Dimension, Index>& traverseRectOrigin,
+                                                const Container<Dimension, Index>& traverseRectDimensions,
+                                                const Container<Dimension, Index>& traverseRectDimensionsProducts,
+                                                const Container<Dimension, Index>& dimensionsProducts) const {
+         const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions);
+         Container<Dimension, Index> globalCoordinates = 0;
+
+         for (Index i = 0; i < Dimension; i++)
+            globalCoordinates[i] = traverseRectOrigin[i] + traverseCoordinates[i];
+
+         const auto globalIndex = getIndex(globalCoordinates, dimensionsProducts);
+
+         return GridEntity<Dimension, Index>(globalIndex, globalCoordinates, {});
+      }
+      /**
+       * Calculates position in the specific boundaries
+       */
+      __cuda_callable__ inline
+      Container<Dimension, Index> getCoordinates(const Index index, const Container<Dimension, Index> &dimensions) const {
+         Container<Dimension, Index> coordinates = 0;
+         Index tmpIndex = index;
+
+         Index dim = Dimension - 1;
+
+         // TODO: -  Implement overflow check.
+         while (tmpIndex) {
+            Index dimension = dimensions[dim],
+                  quotient = tmpIndex / dimension,
+                  reminder = tmpIndex - (dimension * quotient);
+
+            coordinates[dim] = reminder;
+            tmpIndex = quotient;
+
+            dim -= 1;
+         }
+
+         return coordinates;
+      }
+      /**
+       * Calculates product matrix based on the dimension
+       */
+      __cuda_callable__ inline
+      Container<Dimension, Index> getDimensionProducts(const Container<Dimension, Index>& dimensions) const noexcept {
+         Container<Dimension, Index> products = 0;
+
+         for (Index i = Dimension; i > 0; i--)
+            products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i];
+
+         return products;
+      }
+      /**
+       * Calculates index based on the dimension
+       */
+      __cuda_callable__ inline
+      Index getIndex(const Container<Dimension, Index>& coordinates,
+                     const Container<Dimension, Index>& dimensionProducts) const {
+         Index index = 0;
+
+         for (Index i = 0; i < Dimension; i++)
+            index += coordinates[i] * dimensionProducts[i];
+
+         return index;
+      }
+
+};
+
+template <typename Real>
+template <typename Device>
+bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) const {
+   Grid<2, int, Device> grid;
+
+   grid.setDimensions(params.xSize, params.ySize);
+
+   const Real hx = params.xDomainSize / (Real)grid.getDimension(0);
+   const Real hy = params.yDomainSize / (Real)grid.getDimension(1);
+   const Real hx_inv = 1 / (hx * hx);
+   const Real hy_inv = 1 / (hy * hy);
+
+   auto entitiesCount = grid.getEntitiesCount(0);
+   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+
+   TNL::Containers::Array<Real, Device> ux(entitiesCount), // data at step u
+                                        aux(entitiesCount);// data at step u + 1
+
+   auto uxView = ux.getView(), auxView = aux.getView();
+
+   // Invalidate ux/aux
+   ux = 0;
+   aux = 0;
+
+   auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable {
+      auto position = entity.getCoordinates();
+      auto index = entity.getIndex();
+
+      auto x = position[0] * hx - params.xDomainSize / 2;
+      auto y = position[1] * hx - params.yDomainSize / 2;
+
+      uxView[index] = exp(params.sigma * (x * x + y * y));
+   };
+
+   const Container<2, bool> direction{ false, false };
+
+   grid.traverse({ 1, 1 },
+                 { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 },
+                 { direction },
+                 init);
+
+   if (!writeGNUPlot("data.txt", params, ux))
+      return false;
+
+   auto xDimension = grid.getDimension(0);
+
+   auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable {
+      auto index = entity.getIndex();
+
+      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
+                       (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv;
+   };
+
+   auto update = [=] __cuda_callable__(int i) mutable {
+      uxView[i] += auxView[i] * timestep;
+   };
+
+   Real start = 0;
+
+   while (start < params.finalTime) {
+      grid.traverse({ 1, 1 },
+                    { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 },
+                    { direction },
+                    next);
+
+      grid.traverseAll(update);
+
+      start += timestep;
+   }
+
+   return writeGNUPlot("data_final.txt", params, ux);
+};
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
index fbd536f8f..e6671e2e5 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
@@ -7,418 +7,7 @@
 #include <bitset>
 
 #include "../Base/HeatmapSolver.h"
-
-#include <TNL/Config/parseCommandLine.h>
-#include <TNL/Devices/Host.h>
-#include <TNL/Containers/Array.h>
-#include <TNL/Containers/StaticArray.h>
-#include <TNL/Algorithms/ParallelFor.h>
-
-#pragma once
-
-template<bool ...> struct bool_pack {};
-
-template <bool... Bs>
-using conjunction = std::is_same<bool_pack<true, Bs...>, bool_pack<Bs..., true>>;
-
-template<int Dimension,
-         typename Index,
-         typename = std::enable_if_t<(Dimension > 0)>,
-         typename = std::enable_if_t<std::is_integral<Index>::value>>
-using Container = TNL::Containers::StaticArray<Dimension, Index>;
-
-template<int Dimension,
-         typename Index,
-         typename = std::enable_if_t<(Dimension > 0)>,
-         typename = std::enable_if_t<std::is_integral<Index>::value>>
-class GridEntity {
-   public:
-      __cuda_callable__
-      inline explicit GridEntity(const Index& vertexIndex,
-                                 const Container<Dimension, Index>& coordinates,
-                                 const Container<Dimension, bool>& direction) :
-                                 vertexIndex(vertexIndex),
-                                 coordinates(coordinates),
-                                 direction(direction) {};
-
-      __cuda_callable__
-      ~GridEntity() {};
-
-      __cuda_callable__
-      inline Container<Dimension, Index> getCoordinates() const noexcept {
-         return coordinates;
-      }
-
-      __cuda_callable__
-      inline Index getIndex() const noexcept {
-         return vertexIndex;
-      }
-   private:
-      const Index vertexIndex;
-      const Container<Dimension, Index> coordinates;
-      const Container<Dimension, bool> direction;
-};
-
-template<int Dimension,
-         typename Index,
-         typename Device = TNL::Devices::Host,
-         typename = std::enable_if_t<(Dimension > 0)>>
-class Grid {
-   public:
-      Grid() {}
-      ~Grid() {}
-
-      /**
-       *  @brief - Specifies dimensions of the grid
-       *  @param[in] dimensions - A parameter pack, which specifies points count in the specific dimension.
-       *                          Most significant dimension is in the beginning of the list.
-       *                          Least significant dimension is in the end of the list
-       */
-      template <typename... Dimensions,
-                typename = std::enable_if_t<conjunction<std::is_same<Index, Dimensions>::value...>::value>,
-                typename = std::enable_if_t<sizeof...(Dimensions) == Dimension>>
-      void setDimensions(Dimensions... dimensions) noexcept {
-         Index i = 0;
-
-         for (auto x: { dimensions... }) {
-            TNL_ASSERT_GT(x, 0, "Dimension must be positive");
-            this -> dimensions[i] = x;
-            i++;
-         }
-
-         refreshDimensionMaps();
-      }
-      /**
-       * @param[in] index - index of dimension
-       */
-      inline __cuda_callable__ Index getDimension(Index index) const noexcept {
-         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
-         TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension");
-
-         return dimensions[index];
-      }
-      /**
-       * @param[in] indices - A dimension index pack
-       */
-      template <typename... DimensionIndex,
-                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
-                typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-      Container<sizeof...(DimensionIndex), Index> getDimensions(DimensionIndex... indices) const noexcept {
-         Container<sizeof...(DimensionIndex), Index> result{indices...};
-
-         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
-            result[i] = this -> getDimension(result[i]);
-
-         return result;
-      }
-      /**
-       * @param[in] index - index of dimension
-       */
-      inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept {
-         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
-         TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension");
-
-         return cumulativeDimensionMap(index);
-      }
-      /**
-       * @brief - Returns the number of entities of specific dimension
-       */
-      template <typename... DimensionIndex,
-                typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
-                typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-      Container<sizeof...(DimensionIndex), Index> getEntitiesCounts(DimensionIndex... indices) const noexcept {
-         Container<sizeof...(DimensionIndex), Index> result{indices...};
-
-         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
-            result[i] = this -> getEntitiesCount(result[i]);
-
-         return result;
-      }
-      /**
-       * @param[in] index - index of dimension
-       */
-      inline __cuda_callable__ Index getEndIndex(Index index) const noexcept {
-         TNL_ASSERT_GE(index, 0, "Index must be greater than zero");
-         TNL_ASSERT_LT(index, Dimension, "Index must be less than or equal to Dimension");
-
-         return this -> getDimension(index) - 1;
-      }
-       /**
-       * @brief - Returns the last index of specific dimensions
-       */
-      template <typename... DimensionIndex,
-         typename = std::enable_if_t<conjunction<std::is_same<Index, DimensionIndex>::value...>::value>,
-         typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>>
-         Container<sizeof...(DimensionIndex), Index> getEndIndices(DimensionIndex... indices) const noexcept {
-         Container<sizeof...(DimensionIndex), Index> result{ indices... };
-
-         for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++)
-            result[i] = this->getEndIndex(result[i]);
-
-         return result;
-      }
-      /**
-       * @brief - Traversers all elements in the grid
-       */
-      template <typename Function, typename... FunctionArgs>
-      void traverseAll(Function function, FunctionArgs... args) const noexcept {
-         auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable {
-            function(index, args...);
-         };
-
-         TNL::Algorithms::ParallelFor<Device>::exec(0, cumulativeDimensionMap[0], lambda, args...);
-      }
-      /**
-       * @brief - Traverses a grid from start index to end index.
-       *
-       * @param[in] start - a start index of point
-       * @param[in] end - an end index of point
-       * @param[in] directions - A pack of boolean vector flags with the size of the dimension.
-       *   For example, let's have the 3-dimensional grid.
-       *     A pack {false, false, false} will call function for all points
-       *     A pack {true, false, false} will call function for edges directed over dimension at index of true
-       *     A pack {true, true, false} will call function for faces directed over dimension at index of true
-       *     A pack {true, true, true} will call function for cells directed over
-       *
-       */
-      template<typename Function, typename... FunctionArgs>
-      void traverse(const Container<Dimension, Index>& firstVertex,
-                    const Container<Dimension, Index>& secondVertex,
-                    const TNL::Containers::Array<Container<Dimension, bool>, Device>& directions,
-                    Function function,
-                    FunctionArgs... args) const noexcept {
-         Index verticesCount = 1;
-         Container<Dimension, Index> traverseRectOrigin, traverseRectDimensions,
-                                     dimensionsProducts, traverseRectDimensionsProducts;
-
-         for (Index i = 0; i < Dimension; i++) {
-            traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1;
-            verticesCount *= traverseRectDimensions[i];
-            traverseRectOrigin[i] = std::min(firstVertex[i], secondVertex[i]);
-
-            TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range");
-            TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range");
-         }
-
-         dimensionsProducts = getDimensionProducts(dimensions);
-         traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions);
-
-         auto outerFunction = [=] __cuda_callable__(Index offset,
-                                                    const Container<Dimension, Index>& traverseRectOrigin,
-                                                    const Container<Dimension, Index>& traverseRectDimensions,
-                                                    const Container<Dimension, Index>& traverseRectDimensionsProducts,
-                                                    const Container<Dimension, Index>& dimensionsProducts,
-                                                    FunctionArgs... args) mutable {
-            auto entity = this -> makeEntitity(offset,
-                                               traverseRectOrigin, traverseRectDimensions,
-                                               traverseRectDimensionsProducts, dimensionsProducts);
-
-            function(entity, args...);
-         };
-
-         Index lowerBound = 0, upperBound = verticesCount;
-
-         for (Index i = 0; i < directions.getSize(); i++) {
-            TNL::Algorithms::ParallelFor<Device>::exec(lowerBound, upperBound, outerFunction,
-                                                       traverseRectOrigin,
-                                                       traverseRectDimensions,
-                                                       traverseRectDimensionsProducts,
-                                                       dimensionsProducts,
-                                                       args...);
-         }
-      }
-   private:
-      Container<Dimension, Index> dimensions;
-      /**
-       * @brief - A dimension map is a store for dimension limits over all combinations of basis.
-       *          First, (n choose 0) elements will contain the count of 0 dimension elements
-       *          Second, (n choose 1) elements will contain the count of 1-dimension elements
-       *          ....
-       *
-       *          For example, let's have a 3-d grid, then the map indexing will be the next:
-       *            0 - 0 - count of vertices
-       *            1, 2, 3 - count of edges in z, y, x plane
-       *            4, 5, 6 - count of faces in yz, xz, xy plane
-       *            7 - count of cells in z y x plane
-       *
-       * @warning - The ordering of is lexigraphical.
-       */
-      Container<1 << Dimension, Index> dimensionMap;
-      /**
-       * @brief - A cumulative map over dimensions.
-       */
-      Container<Dimension + 1, Index> cumulativeDimensionMap;
-      /**
-       * @brief - Fills dimensions map for N-dimensional Grid.
-       *
-       * @complexity - O(2 ^ Dimension)
-       */
-      void refreshDimensionMaps() noexcept {
-         std::array<bool, Dimension> combinationBuffer = {};
-         std::size_t j = 0;
-
-         for (std::size_t i = 0; i < Dimension + 1; i++)
-            cumulativeDimensionMap[i] = 0;
-
-         for (std::size_t i = 0; i <= Dimension; i++) {
-            std::fill(combinationBuffer.begin(), combinationBuffer.end(), false);
-            std::fill(combinationBuffer.end() - i, combinationBuffer.end(), true);
-
-            do {
-               int result = 1;
-
-               for (std::size_t k = 0; k < combinationBuffer.size(); k++)
-                  result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k];
-
-               dimensionMap[j] = result;
-               cumulativeDimensionMap[i] += result;
-
-               j++;
-            } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end()));
-         }
-      }
-
-      __cuda_callable__ inline
-      GridEntity<Dimension, Index> makeEntitity(const Index index,
-                                                const Container<Dimension, Index>& traverseRectOrigin,
-                                                const Container<Dimension, Index>& traverseRectDimensions,
-                                                const Container<Dimension, Index>& traverseRectDimensionsProducts,
-                                                const Container<Dimension, Index>& dimensionsProducts) const {
-         const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions);
-         Container<Dimension, Index> globalCoordinates = 0;
-
-         for (Index i = 0; i < Dimension; i++)
-            globalCoordinates[i] = traverseRectOrigin[i] + traverseCoordinates[i];
-
-         const auto globalIndex = getIndex(globalCoordinates, dimensionsProducts);
-
-         return GridEntity<Dimension, Index>(globalIndex, globalCoordinates, {});
-      }
-      /**
-       * Calculates position in the specific boundaries
-       */
-      __cuda_callable__ inline
-      Container<Dimension, Index> getCoordinates(const Index index, const Container<Dimension, Index> &dimensions) const {
-         Container<Dimension, Index> coordinates = 0;
-         Index tmpIndex = index;
-
-         Index dim = Dimension - 1;
-
-         // TODO: -  Implement overflow check.
-         while (tmpIndex) {
-            Index dimension = dimensions[dim],
-                  quotient = tmpIndex / dimension,
-                  reminder = tmpIndex - (dimension * quotient);
-
-            coordinates[dim] = reminder;
-            tmpIndex = quotient;
-
-            dim -= 1;
-         }
-
-         return coordinates;
-      }
-      /**
-       * Calculates product matrix based on the dimension
-       */
-      __cuda_callable__ inline
-      Container<Dimension, Index> getDimensionProducts(const Container<Dimension, Index>& dimensions) const noexcept {
-         Container<Dimension, Index> products = 0;
-
-         for (Index i = Dimension; i > 0; i--)
-            products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i];
-
-         return products;
-      }
-      /**
-       * Calculates index based on the dimension
-       */
-      __cuda_callable__ inline
-      Index getIndex(const Container<Dimension, Index>& coordinates,
-                     const Container<Dimension, Index>& dimensionProducts) const {
-         Index index = 0;
-
-         for (Index i = 0; i < Dimension; i++)
-            index += coordinates[i] * dimensionProducts[i];
-
-         return index;
-      }
-
-};
-
-template <typename Real>
-template <typename Device>
-bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) const {
-   Grid<2, int, Device> grid;
-
-   grid.setDimensions(params.xSize, params.ySize);
-
-   const Real hx = params.xDomainSize / (Real)grid.getDimension(0);
-   const Real hy = params.yDomainSize / (Real)grid.getDimension(1);
-   const Real hx_inv = 1 / (hx * hx);
-   const Real hy_inv = 1 / (hy * hy);
-
-   auto entitiesCount = grid.getEntitiesCount(0);
-   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
-
-   TNL::Containers::Array<Real, Device> ux(entitiesCount), // data at step u
-                                        aux(entitiesCount);// data at step u + 1
-
-   auto uxView = ux.getView(), auxView = aux.getView();
-
-   // Invalidate ux/aux
-   ux = 0;
-   aux = 0;
-
-   auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable {
-      auto position = entity.getCoordinates();
-      auto index = entity.getIndex();
-
-      auto x = position[0] * hx - params.xDomainSize / 2;
-      auto y = position[1] * hx - params.yDomainSize / 2;
-
-      uxView[index] = exp(params.sigma * (x * x + y * y));
-   };
-
-   const Container<2, bool> direction{ false, false };
-
-   grid.traverse({ 1, 1 },
-                 { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 },
-                 { direction },
-                 init);
-
-   if (!writeGNUPlot("data.txt", params, ux))
-      return false;
-
-   auto xDimension = grid.getDimension(0);
-
-   auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable {
-      auto index = entity.getIndex();
-
-      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
-                       (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv;
-   };
-
-   auto update = [=] __cuda_callable__(int i) mutable {
-      uxView[i] += auxView[i] * timestep;
-   };
-
-   Real start = 0;
-
-   while (start < params.finalTime) {
-      grid.traverse({ 1, 1 },
-                    { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 },
-                    { direction },
-                    next);
-
-      grid.traverseAll(update);
-
-      start += timestep;
-   }
-
-   return writeGNUPlot("data_final.txt", params, ux);
-};
-
+#include "implementation.h"
 
 int main(int argc, char* argv[]) {
    using Real = double;
-- 
GitLab


From 9a1e341479f35040ab7863f46494c2bd21772447 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Tue, 30 Nov 2021 15:17:35 +0100
Subject: [PATCH 25/29] Implement benchmark for NDim grid

---
 .../HeatEquationGrid/Base/HeatmapSolver.h     |   2 +-
 .../Base/HeatmapSolverBenchmark.h             | 115 ++++++++++--------
 .../HeatmapNDimGrid/CMakeLists.txt            |   7 +-
 .../HeatmapNDimGrid/benchmark/CMakeLists.txt  |   5 +
 .../HeatmapNDimGrid/{ => benchmark}/main.cpp  |   0
 .../HeatmapNDimGrid/{ => benchmark}/main.cu   |   0
 .../HeatmapNDimGrid/benchmark/main.h          |   6 +
 .../HeatmapNDimGrid/solution/CMakeLists.txt   |   5 +
 .../HeatmapNDimGrid/solution/main.cpp         |   2 +
 .../HeatmapNDimGrid/solution/main.cu          |   2 +
 .../HeatmapNDimGrid/{ => solution}/main.h     |  26 +---
 11 files changed, 92 insertions(+), 78 deletions(-)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt
 rename src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/{ => benchmark}/main.cpp (100%)
 rename src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/{ => benchmark}/main.cu (100%)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu
 rename src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/{ => solution}/main.h (62%)

diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
index 6f3411268..6974e06bc 100644
--- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
@@ -83,7 +83,7 @@ HeatmapSolver<Real>::Parameters::Parameters(const TNL::Config::ParameterContaine
                                                                                                  sigma(parameters.getParameter<Real>("sigma")),
                                                                                                  timeStep(parameters.getParameter<Real>("time-step")),
                                                                                                  finalTime(parameters.getParameter<Real>("final-time")),
-                                                                                                 outputData(parameters.getParameter<bool>("outputData"))
+                                                                                                 outputData(parameters.getParameter<bool>("outputData")),
                                                                                                  verbose(parameters.getParameter<bool>("verbose")) {}
 
 template <typename Real>
diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
index 2d7fd0504..6e9539f2a 100644
--- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
@@ -8,20 +8,21 @@
 
 #include "HeatmapSolver.h"
 
+#pragma once
+
 class HeatmapSolverBenchmark {
    public:
       template<typename Real, typename Device>
-      void exec(const HeatmapSolver::Parameters& params) const;
-
-      template< typename Real >
-      void runBenchmark(const Benchmark& benchmark,
-                        const Benchmark::MetadataMap,
-                        const int minXDimension,
-                        const int maxXDimension,
-                        const int xStepSizeFactor,
-                        const int minYDimension,
-                        const int maxYDimension,
-                        const int yStepSizeFactor,
+      void exec(const typename HeatmapSolver<Real>::Parameters& params) const;
+
+      template<typename Real, typename Device>
+      void runBenchmark(TNL::Benchmarks::Benchmark<> & benchmark,
+                        const std::size_t minXDimension,
+                        const std::size_t maxXDimension,
+                        const std::size_t xStepSizeFactor,
+                        const std::size_t minYDimension,
+                        const std::size_t maxYDimension,
+                        const std::size_t yStepSizeFactor,
                         const TNL::Config::ParameterContainer& parameters) const;
 
       static TNL::Config::ConfigDescription makeInputConfig();
@@ -42,11 +43,11 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() {
    config.addEntryEnum("all");
 
    config.addEntry<int>("min-x-dimension", "Minimum dimension over x axis used in the benchmark.", 100);
-   config.addEntry<int>("max-x-dimension", "Maximum dimension over x axis used in the benchmark.", 10000);
+   config.addEntry<int>("max-x-dimension", "Maximum dimension over x axis used in the benchmark.", 200);
    config.addEntry<int>("x-size-step-factor", "Factor determining the dimension grows over x axis. First size is min-x-dimension and each following size is stepFactor*previousSize, up to max-x-dimension.", 2);
 
    config.addEntry<int>("min-y-dimension", "Minimum dimension over x axis used in the benchmark.", 100);
-   config.addEntry<int>("max-y-dimension", "Maximum dimension over x axis used in the benchmark.", 10000);
+   config.addEntry<int>("max-y-dimension", "Maximum dimension over x axis used in the benchmark.", 200);
    config.addEntry<int>("y-size-step-factor", "Factor determining the dimension grows over y axis. First size is min-y-dimension and each following size is stepFactor*previousSize, up to max-y-dimension.", 2);
 
    config.addEntry<int>("loops", "Number of iterations for every computation.", 10);
@@ -65,48 +66,54 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() {
    config.addDelimiter("Device settings:");
    TNL::Devices::Host::configSetup( config );
    TNL::Devices::Cuda::configSetup( config );
-}
-
-template<typename Real,
-         template Device>
-void HeatmapSolverBenchmark::runBenchmark(Benchmark& benchmark,
-                                          Benchmark::MetadataMap metadata,
-                                          const int minXDimension,
-                                          const int maxXDimension,
-                                          const int xStepSizeFactor,
-                                          const int minYDimension,
-                                          const int maxYDimension,
-                                          const int yStepSizeFactor,
-                                          const TNL::Config::ParameterContainer& parameters) const {
-   const TNL::String precision = getType< Real >();
-   metadata["precision"] = precision;
-
-   benchmark.newBenchmark(String("Heatmap grid with (") + precision + ", host allocator" + Device + ")");
-
-   auto xDomainSize = parameters.getParameter<Real>("domain-x-size");
-   auto yDomainSize = parameters.getParameter<Real>("domain-y-size");
 
-   auto sigma = parameters.getParameter<Real>("sigma");
+   return config;
+}
 
-   auto timeStep = parameters.getParameter<Real>("time-step");
+template<typename Real, typename Device>
+void HeatmapSolverBenchmark::exec(const typename HeatmapSolver<Real>::Parameters& params) const {
+   HeatmapSolver<Real> solver;
 
-   auto finalTime = parameters.getParameter<Real>("final-time");
+   auto result = solver.template solve<Device>(params);
 
+   if (!result)
+      std::cout << "Fail to solve for grid size (" << params.xSize << ", " << params.ySize << ")" << std::endl;
+}
 
-   for(std::size_t xSize = minXDimension; xSize <= maxXDimension; size *= xStepSizeFactor) {
-      for(std::size_t ySize = minXDimension; ySize <= maxXDimension; size *= yStepSizeFactor) {
-         benchmark.setMetadataColumns( Benchmark::MetadataColumns({
-            { "xSize", convertToString(xSize) },
-            { "ySize", convertToString(ySize) }
+template<typename Real, typename Device>
+void HeatmapSolverBenchmark::runBenchmark(TNL::Benchmarks::Benchmark<>& benchmark,
+                                          const std::size_t minXDimension,
+                                          const std::size_t maxXDimension,
+                                          const std::size_t xStepSizeFactor,
+                                          const std::size_t minYDimension,
+                                          const std::size_t maxYDimension,
+                                          const std::size_t yStepSizeFactor,
+                                          const TNL::Config::ParameterContainer& parameters) const {
+   Real xDomainSize = parameters.getParameter<Real>("domain-x-size");
+   Real yDomainSize = parameters.getParameter<Real>("domain-y-size");
+   Real sigma = parameters.getParameter<Real>("sigma");
+   Real timeStep = parameters.getParameter<Real>("time-step");
+   Real finalTime = parameters.getParameter<Real>("final-time");
+
+   auto precision = TNL::getType<Real>(), device = TNL::getType<Device>();
+
+   std::cout << "Heatmap grid with (" + precision + ", host allocator " + device + ")" << std::endl;
+
+   for(std::size_t xSize = minXDimension; xSize <= maxXDimension; xSize *= xStepSizeFactor) {
+      for(std::size_t ySize = minXDimension; ySize <= maxXDimension; ySize *= yStepSizeFactor) {
+         benchmark.setMetadataColumns( TNL::Benchmarks::Benchmark<>::MetadataColumns({
+            { "precision", precision },
+            { "xSize", TNL::convertToString(xSize) },
+            { "ySize", TNL::convertToString(ySize) }
          }));
 
          auto lambda = [=]() {
-            HeatmapSolver::Parameters params(xSize, ySize, xDomainSize, yDomainSize, sigma, timeStep, finalTime, false, false);
+            typename HeatmapSolver<Real>::Parameters params(xSize, ySize, xDomainSize, yDomainSize, sigma, timeStep, finalTime, false, false);
 
-            exec(params);
+            exec<Real, Device>(params);
          };
 
-         benchmark.time<Device>()
+         benchmark.time<Device>("_", lambda);
       }
    }
 }
@@ -156,17 +163,23 @@ int main(int argc, char* argv[]) {
 
    std::ofstream logFile( logFileName.getString(), mode );
 
-   TNL::Benchmarks::Benchmark benchmark(loops, verbose);
+   TNL::Benchmarks::Benchmark<> benchmark(logFile, loops, verbose);
+
+   // write global metadata into a separate file
+   std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata();
+   TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
    if( precision == "all" || precision == "float" )
-      solver.runBenchmark<float>(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters);
+      solver.runBenchmark<float, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
    if( precision == "all" || precision == "double" )
-      solver.runBenchmark<double>(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters);
+      solver.runBenchmark<double, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
 
-   if(!benchmark.save(logFile)) {
-      std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
-      return EXIT_FAILURE;
-   }
+#ifdef HAVE_CUDA
+   if( precision == "all" || precision == "float" )
+      solver.runBenchmark<float, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+   if( precision == "all" || precision == "double" )
+      solver.runBenchmark<double, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+#endif
 
    return EXIT_SUCCESS;
 }
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
index d493b57b4..58c97ef0d 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt
@@ -1,5 +1,2 @@
-if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cu )
-ELSE()
-   add_executable( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cpp )
-ENDIF()
+ADD_SUBDIRECTORY( benchmark )
+ADD_SUBDIRECTORY( solution )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt
new file mode 100644
index 000000000..d95ce1fa7
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
+ELSE()
+   add_executable( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cpp
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp
rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cpp
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cu
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu
rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cu
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h
new file mode 100644
index 000000000..24eb97da8
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h
@@ -0,0 +1,6 @@
+
+#include "../../Base/HeatmapSolver.h"
+#include "../../Base/HeatmapSolverBenchmark.h"
+#include "../implementation.h"
+
+#pragma once
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt
new file mode 100644
index 000000000..7319b2aca
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu )
+ELSE()
+   add_executable( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.h
similarity index 62%
rename from src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.h
index e6671e2e5..54d0c86f0 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.h
@@ -6,8 +6,8 @@
 #include <array>
 #include <bitset>
 
-#include "../Base/HeatmapSolver.h"
-#include "implementation.h"
+#include "../../Base/HeatmapSolver.h"
+#include "../implementation.h"
 
 int main(int argc, char* argv[]) {
    using Real = double;
@@ -19,6 +19,9 @@ int main(int argc, char* argv[]) {
       return EXIT_FAILURE;
 
    auto device = parameters.getParameter<TNL::String>("device");
+
+   parameters.addParameter("outputData", true);
+
    auto params = HeatmapSolver<Real>::Parameters(parameters);
 
    HeatmapSolver<Real> solver;
@@ -33,22 +36,3 @@ int main(int argc, char* argv[]) {
 
    return EXIT_SUCCESS;
 }
-
-
-/*
-int main(int argc, char *argv[]) {
-   Grid<2, int, TNL::Devices::Cuda> grid;
-
-   grid.setDimensions(5, 5);
-
-   auto fn_entity = [=] __cuda_callable__ (GridEntity<2, int> entity) {
-      printf("%d %d %d\n", entity.getCoordinates()[0], entity.getCoordinates()[1], entity.getIndex());
-   };
-
-   Container<2, int> direction { 0, 0 };
-
-   grid.traverse({ 1, 1 }, { 4, 4 }, { direction }, fn_entity);
-
-   return 0;
-}
-*/
-- 
GitLab


From f1d13b09ad1ca00fbdbf68f7acfab4c979f61c7b Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 2 Dec 2021 09:26:12 +0100
Subject: [PATCH 26/29] Refactor parallel for solution to use benchmarks

---
 .../HeatmapNDimGrid/benchmark/CMakeLists.txt  |  4 +--
 .../HeatmapNDimGrid/solution/CMakeLists.txt   |  4 +--
 .../HeatmapParallelFor/CMakeLists.txt         |  7 ++--
 .../benchmark/CMakeLists.txt                  |  5 +++
 .../{ => benchmark}/main.cpp                  |  0
 .../{ => benchmark}/main.cu                   |  0
 .../HeatmapParallelFor/benchmark/main.h       |  5 +++
 .../{main.h => implementation.h}              | 28 ---------------
 .../solution/CMakeLists.txt                   |  5 +++
 .../HeatmapParallelFor/solution/main.cpp      |  2 ++
 .../HeatmapParallelFor/solution/main.cu       |  2 ++
 .../HeatmapParallelFor/solution/main.h        | 35 +++++++++++++++++++
 12 files changed, 60 insertions(+), 37 deletions(-)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt
 rename src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/{ => benchmark}/main.cpp (100%)
 rename src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/{ => benchmark}/main.cu (100%)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h
 rename src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/{main.h => implementation.h} (76%)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt
index d95ce1fa7..b52b3909d 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
+   CUDA_ADD_EXECUTABLE( heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
 ELSE()
-   add_executable( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
+   add_executable( heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
 ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt
index 7319b2aca..5ab54f05a 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu )
+   CUDA_ADD_EXECUTABLE( heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu )
 ELSE()
-   add_executable( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp )
+   add_executable( heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp )
 ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
index 51ee69d15..58c97ef0d 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt
@@ -1,5 +1,2 @@
-if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cu )
-ELSE()
-   add_executable( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cpp )
-ENDIF()
+ADD_SUBDIRECTORY( benchmark )
+ADD_SUBDIRECTORY( solution )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt
new file mode 100644
index 000000000..364b4a99f
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( heat_parallel_for_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
+ELSE()
+   add_executable( heat_parallel_for_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cpp
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp
rename to src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cpp
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cu
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu
rename to src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cu
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h
new file mode 100644
index 000000000..64defc7d1
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h
@@ -0,0 +1,5 @@
+#include "../../Base/HeatmapSolver.h"
+#include "../../Base/HeatmapSolverBenchmark.h"
+#include "../implementation.h"
+
+#pragma once
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h
similarity index 76%
rename from src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
rename to src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h
index 08658ea14..c8ad25eb9 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h
@@ -1,7 +1,4 @@
 
-#include <iostream>
-#include <fstream>
-#include <TNL/Config/parseCommandLine.h>
 #include <TNL/Containers/Array.h>
 #include <TNL/Algorithms/ParallelFor.h>
 
@@ -89,28 +86,3 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    return writeGNUPlot("data_final.txt", params, ux);
 }
-
-int main(int argc, char *argv[]) {
-   using Real = double;
-
-   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
-
-   TNL::Config::ParameterContainer parameters;
-   if (!parseCommandLine(argc, argv, config, parameters))
-      return EXIT_FAILURE;
-
-   auto device = parameters.getParameter<TNL::String>("device");
-   auto params = HeatmapSolver<Real>::Parameters(parameters);
-
-   HeatmapSolver<Real> solver;
-
-   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
-      return EXIT_FAILURE;
-
-#ifdef HAVE_CUDA
-   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
-      return EXIT_FAILURE;
-#endif
-
-   return EXIT_SUCCESS;
-}
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt
new file mode 100644
index 000000000..b134a4b22
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
+ELSE()
+   add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h
new file mode 100644
index 000000000..c3192eb13
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h
@@ -0,0 +1,35 @@
+
+#include <iostream>
+#include <fstream>
+#include <numeric>
+#include <type_traits>
+#include <array>
+#include <bitset>
+
+#include "../../Base/HeatmapSolver.h"
+#include "../implementation.h"
+
+int main(int argc, char *argv[]) {
+   using Real = double;
+
+   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
+
+   TNL::Config::ParameterContainer parameters;
+   if (!parseCommandLine(argc, argv, config, parameters))
+      return EXIT_FAILURE;
+
+   auto device = parameters.getParameter<TNL::String>("device");
+   auto params = HeatmapSolver<Real>::Parameters(parameters);
+
+   HeatmapSolver<Real> solver;
+
+   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+      return EXIT_FAILURE;
+
+#ifdef HAVE_CUDA
+   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+      return EXIT_FAILURE;
+#endif
+
+   return EXIT_SUCCESS;
+}
-- 
GitLab


From 50f94102549b3712417c47a4a6d176dee57e6ff3 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 2 Dec 2021 15:51:40 +0100
Subject: [PATCH 27/29] Implement benchmarks for TNL Grid/ParallelFor

---
 .../HeatEquationGrid/Base/HeatmapSolver.h     |   2 +-
 .../Base/HeatmapSolverBenchmark.h             |  31 +++-
 .../HeatmapNDimGrid/implementation.h          |  40 +++--
 .../solution/CMakeLists.txt                   |   4 +-
 .../HeatmapParallelFor/solution/main.h        |   6 +-
 .../HeatmapTNLGrid/CMakeLists.txt             |   7 +-
 .../HeatmapTNLGrid/benchmark/CMakeLists.txt   |   5 +
 .../HeatmapTNLGrid/{ => benchmark}/main.cpp   |   0
 .../HeatmapTNLGrid/{ => benchmark}/main.cu    |   0
 .../HeatmapTNLGrid/benchmark/main.h           |   5 +
 .../HeatmapTNLGrid/implementation.h           |  77 ++++++++
 .../HeatEquationGrid/HeatmapTNLGrid/main.h    | 168 ------------------
 .../HeatmapTNLGrid/solution/CMakeLists.txt    |   5 +
 .../HeatmapTNLGrid/solution/main.cpp          |   2 +
 .../HeatmapTNLGrid/solution/main.cu           |   2 +
 .../HeatmapTNLGrid/solution/main.h            |  37 ++++
 16 files changed, 186 insertions(+), 205 deletions(-)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt
 rename src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/{ => benchmark}/main.cpp (100%)
 rename src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/{ => benchmark}/main.cu (100%)
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h
 delete mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu
 create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h

diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
index 6974e06bc..9adfef757 100644
--- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h
@@ -54,7 +54,7 @@ TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig(
 {
    TNL::Config::ConfigDescription config;
 
-   config.addEntry<TNL::String>("device", "Device the computation will run on.", "host");
+   config.addEntry<TNL::String>("device", "Device the computation will run on.", "cuda");
    config.addEntryEnum<TNL::String>("host");
 
 #ifdef HAVE_CUDA
diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
index 6e9539f2a..a8b507aa4 100644
--- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
+++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h
@@ -37,6 +37,13 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() {
    config.addEntryEnum("append");
    config.addEntryEnum("overwrite");
 
+   config.addEntry<TNL::String>("device", "Device the computation will run on.", "cuda");
+   config.addEntryEnum<TNL::String>("host");
+
+#ifdef HAVE_CUDA
+   config.addEntryEnum<TNL::String>("cuda");
+#endif
+
    config.addEntry<TNL::String>("precision", "Precision of the arithmetics.", "double");
    config.addEntryEnum("float");
    config.addEntryEnum("double");
@@ -77,7 +84,7 @@ void HeatmapSolverBenchmark::exec(const typename HeatmapSolver<Real>::Parameters
    auto result = solver.template solve<Device>(params);
 
    if (!result)
-      std::cout << "Fail to solve for grid size (" << params.xSize << ", " << params.ySize << ")" << std::endl;
+      printf("Fail to solve for grid size (%d,%d)", params.xSize, params.ySize);
 }
 
 template<typename Real, typename Device>
@@ -169,16 +176,22 @@ int main(int argc, char* argv[]) {
    std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata();
    TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" );
 
-   if( precision == "all" || precision == "float" )
-      solver.runBenchmark<float, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
-   if( precision == "all" || precision == "double" )
-      solver.runBenchmark<double, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+   auto device = parameters.getParameter<TNL::String>("device");
+
+   if (device == "host") {
+      if(precision == "all" || precision == "float")
+         solver.runBenchmark<float, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+      if(precision == "all" || precision == "double")
+         solver.runBenchmark<double, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+   }
 
 #ifdef HAVE_CUDA
-   if( precision == "all" || precision == "float" )
-      solver.runBenchmark<float, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
-   if( precision == "all" || precision == "double" )
-      solver.runBenchmark<double, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+   if (device == "cuda") {
+      if( precision == "all" || precision == "float" )
+         solver.runBenchmark<float, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+      if( precision == "all" || precision == "double" )
+         solver.runBenchmark<double, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters);
+   }
 #endif
 
    return EXIT_SUCCESS;
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
index ed8ea004d..ce9361fac 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
@@ -204,10 +204,10 @@ class Grid {
          traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions);
 
          auto outerFunction = [=] __cuda_callable__(Index offset,
-                                                    const Container<Dimension, Index>& traverseRectOrigin,
-                                                    const Container<Dimension, Index>& traverseRectDimensions,
-                                                    const Container<Dimension, Index>& traverseRectDimensionsProducts,
-                                                    const Container<Dimension, Index>& dimensionsProducts,
+                                                    const Container<Dimension, Index> traverseRectOrigin,
+                                                    const Container<Dimension, Index> traverseRectDimensions,
+                                                    const Container<Dimension, Index> traverseRectDimensionsProducts,
+                                                    const Container<Dimension, Index> dimensionsProducts,
                                                     FunctionArgs... args) mutable {
             auto entity = this -> makeEntitity(offset,
                                                traverseRectOrigin, traverseRectDimensions,
@@ -237,8 +237,8 @@ class Grid {
        *
        *          For example, let's have a 3-d grid, then the map indexing will be the next:
        *            0 - 0 - count of vertices
-       *            1, 2, 3 - count of edges in z, y, x plane
-       *            4, 5, 6 - count of faces in yz, xz, xy plane
+       *            1, 2, 3 - count of edges in x, y, z plane
+       *            4, 5, 6 - count of faces in xy, yz, zy plane
        *            7 - count of cells in z y x plane
        *
        * @warning - The ordering of is lexigraphical.
@@ -268,7 +268,7 @@ class Grid {
                int result = 1;
 
                for (std::size_t k = 0; k < combinationBuffer.size(); k++)
-                  result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k];
+                  result *= combinationBuffer[k] ? dimensions[Dimension - k - 1] - 1 : dimensions[Dimension - k - 1];
 
                dimensionMap[j] = result;
                cumulativeDimensionMap[i] += result;
@@ -279,12 +279,13 @@ class Grid {
       }
 
       __cuda_callable__ inline
-      GridEntity<Dimension, Index> makeEntitity(const Index index,
+      GridEntity<Dimension, Index> makeEntitity(const Index& index,
                                                 const Container<Dimension, Index>& traverseRectOrigin,
                                                 const Container<Dimension, Index>& traverseRectDimensions,
                                                 const Container<Dimension, Index>& traverseRectDimensionsProducts,
                                                 const Container<Dimension, Index>& dimensionsProducts) const {
-         const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions);
+         //Container<Dimension, Index> traverseCoordinates = 0;
+         Container<Dimension, Index> traverseCoordinates = getCoordinates(index, traverseRectDimensions);
          Container<Dimension, Index> globalCoordinates = 0;
 
          for (Index i = 0; i < Dimension; i++)
@@ -298,22 +299,23 @@ class Grid {
        * Calculates position in the specific boundaries
        */
       __cuda_callable__ inline
-      Container<Dimension, Index> getCoordinates(const Index index, const Container<Dimension, Index> &dimensions) const {
+      Container<Dimension, Index> getCoordinates(const Index& index,
+                                                 const Container<Dimension, Index> &dimensions) const {
          Container<Dimension, Index> coordinates = 0;
          Index tmpIndex = index;
 
-         Index dim = Dimension - 1;
+         Index dimensionIndex = 0;
 
          // TODO: -  Implement overflow check.
-         while (tmpIndex) {
-            Index dimension = dimensions[dim],
+         while (tmpIndex && dimensionIndex < Dimension) {
+            Index dimension = dimensions[dimensionIndex],
                   quotient = tmpIndex / dimension,
                   reminder = tmpIndex - (dimension * quotient);
 
-            coordinates[dim] = reminder;
+            coordinates[dimensionIndex] = reminder;
             tmpIndex = quotient;
 
-            dim -= 1;
+            dimensionIndex += 1;
          }
 
          return coordinates;
@@ -325,8 +327,10 @@ class Grid {
       Container<Dimension, Index> getDimensionProducts(const Container<Dimension, Index>& dimensions) const noexcept {
          Container<Dimension, Index> products = 0;
 
-         for (Index i = Dimension; i > 0; i--)
-            products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i];
+         products[0] = 1;
+
+         for (Index i = 1; i < Dimension; i++)
+            products[i] = dimensions[i - 1] * products[i - 1];
 
          return products;
       }
@@ -370,7 +374,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
    ux = 0;
    aux = 0;
 
-   auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable {
+   auto init = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable {
       auto position = entity.getCoordinates();
       auto index = entity.getIndex();
 
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt
index b134a4b22..8cc2bdf69 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
+   CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu )
 ELSE()
-   add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
+   add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp )
 ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h
index c3192eb13..31c5ded3a 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h
@@ -18,16 +18,18 @@ int main(int argc, char *argv[]) {
    if (!parseCommandLine(argc, argv, config, parameters))
       return EXIT_FAILURE;
 
+   parameters.addParameter("outputData", true);
+
    auto device = parameters.getParameter<TNL::String>("device");
    auto params = HeatmapSolver<Real>::Parameters(parameters);
 
    HeatmapSolver<Real> solver;
 
-   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+   if (device == "host" && !solver.template solve<TNL::Devices::Host>(params))
       return EXIT_FAILURE;
 
 #ifdef HAVE_CUDA
-   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+   if (device == "cuda" && !solver.template solve<TNL::Devices::Cuda>(params))
       return EXIT_FAILURE;
 #endif
 
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt
index 5672634e3..58c97ef0d 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt
@@ -1,5 +1,2 @@
-if (BUILD_CUDA)
-   CUDA_ADD_EXECUTABLE( benchmark_heat_tnl_grid main.h main.cu )
-ELSE()
-   add_executable( benchmark_heat_tnl_grid main.h main.cpp )
-ENDIF()
+ADD_SUBDIRECTORY( benchmark )
+ADD_SUBDIRECTORY( solution )
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt
new file mode 100644
index 000000000..f8f5f4e5f
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( heat_tnl_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu )
+ELSE()
+   add_executable( heat_tnl_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cpp
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp
rename to src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cpp
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cu
similarity index 100%
rename from src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu
rename to src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cu
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h
new file mode 100644
index 000000000..64defc7d1
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h
@@ -0,0 +1,5 @@
+#include "../../Base/HeatmapSolver.h"
+#include "../../Base/HeatmapSolverBenchmark.h"
+#include "../implementation.h"
+
+#pragma once
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h
new file mode 100644
index 000000000..be4d1a13d
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include "TNL/Meshes/GridDetails/Grid2D.h"
+#include "../Base/HeatmapSolver.h"
+
+template <typename Real>
+template <typename Device>
+bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters& params) const {
+   using Grid2D = TNL::Meshes::Grid<2, Real, Device, int>;
+
+   Grid2D grid;
+
+   // Grid implementation defines its dimensions in the amount of edges.
+   // To align it size to all other benchmarks substract 1
+   grid.setDimensions(params.xSize - 1, params.ySize - 1);
+
+   // TODO: - Improve style of access. It is counterintuitive for person, who doesn't know C++ well
+   int verticesCount = grid.template getEntitiesCount<0>();
+
+   TNL::Containers::Array<Real, Device> ux(verticesCount), // data at step u
+                                        aux(verticesCount);// data at step u + 1
+
+   // Invalidate ux/aux
+   ux = 0;
+   aux = 0;
+
+   const Real hx = params.xDomainSize / (Real)params.xSize;
+   const Real hy = params.yDomainSize / (Real)params.ySize;
+   const Real hx_inv = 1 / (hx * hx);
+   const Real hy_inv = 1 / (hy * hy);
+
+   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+
+   auto uxView = ux.getView(),
+        auxView = aux.getView();
+
+   auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable {
+      auto index = entity.getIndex();
+
+      auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2;
+      auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2;
+
+      uxView[index] = exp(params.sigma * (x * x + y * y));
+   };
+
+   grid.template forInterior<0>(init);
+
+   if (!writeGNUPlot("data.txt", params, ux))
+      return false;
+
+   auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
+      auto index = entity.getIndex();
+      auto width = grid.getDimensions().x() + 1;
+
+      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
+                        (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv;
+   };
+
+   auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
+      auto index = entity.getIndex();
+
+      uxView[index] += auxView[index] * timestep;
+   };
+
+   Real start = 0;
+
+   while (start < params.finalTime) {
+      grid.template forInterior<0>(next);
+      grid.template forInterior<0>(update);
+
+      start += timestep;
+   }
+
+   return writeGNUPlot("data_final.txt", params, ux);
+
+   return false;
+}
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
deleted file mode 100644
index 82b27213d..000000000
--- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h
+++ /dev/null
@@ -1,168 +0,0 @@
-
-#pragma once
-
-#include "TNL/Meshes/GridDetails/Grid2D.h"
-#include "../Base/HeatmapSolver.h"
-
-template <typename Real>
-template <typename Device>
-bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters& params) const {
-   using Grid2D = TNL::Meshes::Grid<2, Real, Device, int>;
-
-   Grid2D grid;
-
-   // Grid implementation defines its dimensions in the amount of edges.
-   // To align it size to all other benchmarks substract 1
-   grid.setDimensions(params.xSize - 1, params.ySize - 1);
-
-   // TODO: - Improve style of access. It is counterintuitive for person, who doesn't know C++ well
-   int verticesCount = grid.template getEntitiesCount<0>();
-
-   TNL::Containers::Array<Real, Device> ux(verticesCount), // data at step u
-                                        aux(verticesCount);// data at step u + 1
-
-   // Invalidate ux/aux
-   ux = 0;
-   aux = 0;
-
-   const Real hx = params.xDomainSize / (Real)params.xSize;
-   const Real hy = params.yDomainSize / (Real)params.ySize;
-   const Real hx_inv = 1 / (hx * hx);
-   const Real hy_inv = 1 / (hy * hy);
-
-   auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
-
-   auto uxView = ux.getView(),
-        auxView = aux.getView();
-
-   auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable {
-      auto index = entity.getIndex();
-
-      auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2;
-      auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2;
-
-      uxView[index] = exp(params.sigma * (x * x + y * y));
-   };
-
-   grid.template forInterior<0>(init);
-
-   if (!writeGNUPlot("data.txt", params, ux))
-      return false;
-
-   auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
-      auto index = entity.getIndex();
-      auto width = grid.getDimensions().x() + 1;
-
-      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
-                        (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv;
-   };
-
-   auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
-      auto index = entity.getIndex();
-
-      uxView[index] += auxView[index] * timestep;
-   };
-
-   Real start = 0;
-
-   while (start < params.finalTime) {
-      grid.template forInterior<0>(next);
-      grid.template forInterior<0>(update);
-
-      start += timestep;
-   }
-
-   return writeGNUPlot("data_final.txt", params, ux);
-
-   return false;
-}
-
-int main(int argc, char* argv[]) {
-   using Real = double;
-
-   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
-
-   TNL::Config::ParameterContainer parameters;
-   if (!parseCommandLine(argc, argv, config, parameters))
-      return EXIT_FAILURE;
-
-   auto device = parameters.getParameter<TNL::String>("device");
-   auto params = HeatmapSolver<Real>::Parameters(parameters);
-
-   HeatmapSolver<Real> solver;
-
-   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
-      return EXIT_FAILURE;
-
-#ifdef HAVE_CUDA
-   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
-      return EXIT_FAILURE;
-#endif
-
-   return EXIT_SUCCESS;
-}
-
-
-// TODO: - Move to tests
-/*
-using Grid1D = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>;
-using Grid2D = TNL::Meshes::Grid<2, float, TNL::Devices::Cuda, int>;
-
-void check1D() {
-   Grid1D grid;
-
-   grid.setDimensions({ 100 });
-
-   auto fn0 = [=] __cuda_callable__(const Grid1D::EntityType<0>&entity, int variant) mutable {
-      printf("%d %d\n", variant, entity.getCoordinates().x());
-      };
-
-   auto fn1 = [=] __cuda_callable__(const Grid1D::EntityType<1>&entity, int variant) mutable {
-      printf("%d %d\n", variant, entity.getCoordinates().x());
-   };
-
-   grid.forAll<0>(fn0, 0);
-   grid.forInterior<0>(fn0, 1);
-   grid.forBoundary<0>(fn0, 2);
-
-   grid.forAll<1>(fn1, 3);
-   grid.forInterior<1>(fn1, 4);
-   grid.forBoundary<1>(fn1, 5);
-}
-
-void check2D() {
-   Grid2D grid;
-
-   grid.setDimensions({ 10, 10 });
-
-   auto fn0 = [=] __cuda_callable__(const Grid2D::EntityType<0>&entity, int variant) mutable {
-      printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
-   };
-
-   auto fn1 = [=] __cuda_callable__(const Grid2D::EntityType<1>&entity, int variant) mutable {
-      printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
-   };
-
-   auto fn2 = [=] __cuda_callable__(const Grid2D::EntityType<2>&entity, int variant) mutable {
-      printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y());
-   };
-
-   std::cout << "Entities count of 0: dimension" << grid.getEntitiesCount<0>() << std::endl;
-
-   grid.forAll<0>(fn0, 0);
-   grid.forInterior<0>(fn0, 1);
-   grid.forBoundary<0>(fn0, 2);
-
-   grid.forAll<1>(fn1, 3);
-   grid.forInterior<1>(fn1, 4);
-   grid.forBoundary<1>(fn1, 5);
-
-   grid.forAll<2>(fn2, 6);
-   grid.forInterior<2>(fn2, 7);
-   grid.forBoundary<2>(fn2, 8);
-}
-
-int main(int argc, char *argv[]) {
-   return 0;
-}
-*/
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt
new file mode 100644
index 000000000..d5efbfc81
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (BUILD_CUDA)
+   CUDA_ADD_EXECUTABLE( heat_tnl_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu )
+ELSE()
+   add_executable( heat_tnl_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp )
+ENDIF()
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu
new file mode 100644
index 000000000..7a7a28956
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu
@@ -0,0 +1,2 @@
+
+#include "main.h"
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h
new file mode 100644
index 000000000..6d888d753
--- /dev/null
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h
@@ -0,0 +1,37 @@
+
+#include <iostream>
+#include <fstream>
+#include <numeric>
+#include <type_traits>
+#include <array>
+#include <bitset>
+
+#include "../../Base/HeatmapSolver.h"
+#include "../implementation.h"
+
+int main(int argc, char *argv[]) {
+   using Real = double;
+
+   auto config = HeatmapSolver<Real>::Parameters::makeInputConfig();
+
+   TNL::Config::ParameterContainer parameters;
+   if (!parseCommandLine(argc, argv, config, parameters))
+      return EXIT_FAILURE;
+
+   parameters.addParameter("outputData", true);
+
+   auto device = parameters.getParameter<TNL::String>("device");
+   auto params = HeatmapSolver<Real>::Parameters(parameters);
+
+   HeatmapSolver<Real> solver;
+
+   if (device == "host" && !solver.solve<TNL::Devices::Host>(params))
+      return EXIT_FAILURE;
+
+#ifdef HAVE_CUDA
+   if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params))
+      return EXIT_FAILURE;
+#endif
+
+   return EXIT_SUCCESS;
+}
-- 
GitLab


From 66648e7333f2ffc74ab699700de32e327e012f83 Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 9 Dec 2021 14:27:41 +0100
Subject: [PATCH 28/29] Remove basis from the GridEntity

---
 src/Benchmarks/CMakeLists.txt                |  2 +-
 src/TNL/Meshes/GridDetails/Grid2D_impl.h     | 19 ++---
 src/TNL/Meshes/GridDetails/GridEntity_impl.h | 73 ++------------------
 src/TNL/Meshes/GridEntity.h                  | 32 +--------
 4 files changed, 13 insertions(+), 113 deletions(-)

diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt
index 6fa019081..985f4f162 100644
--- a/src/Benchmarks/CMakeLists.txt
+++ b/src/Benchmarks/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_subdirectory( HeatEquation )
+#add_subdirectory( HeatEquation )
 add_subdirectory( HeatEquationGrid )
 add_subdirectory( BLAS )
 add_subdirectory( NDArray )
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index 65da95107..d10c63811 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -438,9 +438,8 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
       auto outerOriented = [=] __cuda_callable__(Index i, Index j,
                                                  const Grid<2, Real, Device, Index>&grid,
                                                  const CoordinatesType & orientation,
-                                                 const CoordinatesType & basis,
                                                  FuncArgs... args) mutable {
-         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation, basis);
+         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation);
 
          entity.refresh();
 
@@ -452,7 +451,6 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
                                                    outerOriented,
                                                    *this,
                                                    CoordinatesType(1, 0),
-                                                   CoordinatesType(0, 1),
                                                    args...);
 
       TNL::Algorithms::ParallelFor2D<Device>::exec(0, 0,
@@ -460,7 +458,6 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const {
                                                    outerOriented,
                                                    *this,
                                                    CoordinatesType(0, 1),
-                                                   CoordinatesType(1, 0),
                                                    args...);
       break;
    }
@@ -498,9 +495,8 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons
       auto outerOriented = [=] __cuda_callable__(Index i, Index j,
                                                  Grid<2, Real, Device, Index>& grid,
                                                  const CoordinatesType& orientation,
-                                                 const CoordinatesType& basis,
                                                  FuncArgs... args) mutable {
-         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation, basis);
+         EntityType<EntityDimension> entity(grid, CoordinatesType(i, j), orientation);
 
          entity.refresh();
 
@@ -512,7 +508,6 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons
                                                    outerOriented,
                                                    *this,
                                                    CoordinatesType(1, 0),
-                                                   CoordinatesType(0, 1),
                                                    args...);
 
       TNL::Algorithms::ParallelFor2D<Device>::exec(0, 1,
@@ -520,7 +515,6 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons
                                                    outerOriented,
                                                    *this,
                                                    CoordinatesType(0, 1),
-                                                   CoordinatesType(1, 0),
                                                    args...);
       break;
    }
@@ -576,8 +570,7 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
                                                  Index axis,
                                                  Index axisIndex,
                                                  const Grid<2, Real, Device, Index>& grid,
-                                                 const CoordinatesType & orientation,
-                                                 const CoordinatesType & basis,
+                                                 const CoordinatesType& orientation,
                                                  FuncArgs... args) mutable {
          CoordinatesType coordinates;
 
@@ -593,7 +586,7 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
          default: TNL_ASSERT_TRUE(false, "Received axis index. Expect in range [0..<1]");
          }
 
-         EntityType<EntityDimension> entity(grid, coordinates, orientation, basis);
+         EntityType<EntityDimension> entity(grid, coordinates, orientation);
 
          entity.refresh();
 
@@ -607,7 +600,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
                                                  0, 0,
                                                  *this,
                                                  CoordinatesType(1, 0),
-                                                 CoordinatesType(0, 1),
                                                  args...);
       // Upper horizontal
       TNL::Algorithms::ParallelFor<Device>::exec(0,
@@ -616,7 +608,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
                                                  0, dimensions.y(),
                                                  *this,
                                                  CoordinatesType(1, 0),
-                                                 CoordinatesType(0, 1),
                                                  args...);
       // Left vertical
       TNL::Algorithms::ParallelFor<Device>::exec(0,
@@ -625,7 +616,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
                                                  1, 0,
                                                  *this,
                                                  CoordinatesType(0, 1),
-                                                 CoordinatesType(1, 0),
                                                  args...);
       // Right vertical
       TNL::Algorithms::ParallelFor<Device>::exec(0,
@@ -634,7 +624,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons
                                                  1, dimensions.x(),
                                                  *this,
                                                  CoordinatesType(0, 1),
-                                                 CoordinatesType(1, 0),
                                                  args...);
       break;
    }
diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
index 922673de5..5136e6da7 100644
--- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
@@ -31,9 +31,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid )
 : grid( grid ),
   entityIndex( -1 ),
   coordinates( 0 ),
-  orientation( 0 ),
-  basis( 0 )
-{
+  orientation( 0 ){
 }
 
 template< int Dimension,
@@ -46,14 +44,11 @@ __cuda_callable__ inline
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::
 GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid,
                const CoordinatesType& coordinates,
-               const EntityOrientationType& orientation,
-               const EntityBasisType& basis )
+               const EntityOrientationType& orientation )
 : grid( grid ),
   entityIndex( -1 ),
   coordinates( coordinates ),
-  orientation( orientation ),
-  basis( basis )
-{
+  orientation( orientation ){
 }
 
 template< int Dimension,
@@ -159,37 +154,8 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Con
 setOrientation( const EntityOrientationType& orientation )
 {
    this->orientation = orientation;
-   this->basis = EntityBasisType( 1 ) - abs( orientation );
-}
-
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          int EntityDimension,
-          typename Config >
-__cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::EntityBasisType&
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::
-getBasis() const
-{
-   return this->basis;
 }
 
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          int EntityDimension,
-          typename Config >
-__cuda_callable__ inline
-void
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::
-setBasis( const EntityBasisType& basis )
-{
-   this->basis = basis;
-   this->orientation = EntityOrientationType( 1 ) - abs( basis );
-}
 
 template< int Dimension,
           typename Real,
@@ -270,8 +236,7 @@ __cuda_callable__ inline
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::
 GridEntity( const GridType& grid,
                const CoordinatesType& coordinates,
-               const EntityOrientationType& orientation,
-               const EntityBasisType& basis )
+               const EntityOrientationType& orientation)
 : grid( grid ),
   entityIndex( -1 ),
   coordinates( coordinates )
@@ -361,19 +326,6 @@ getOrientation() const
    return EntityOrientationType( ( IndexType ) 0 );
 }
 
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          typename Config >
-__cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::EntityBasisType
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::
-getBasis() const
-{
-   return EntityBasisType( ( IndexType ) 1 );
-}
-
 template< int Dimension,
           typename Real,
           typename Device,
@@ -458,7 +410,6 @@ GridEntity( const GridType& grid )
 {
 }
 
-// Basis is Discarded
 template< int Dimension,
           typename Real,
           typename Device,
@@ -468,8 +419,7 @@ __cuda_callable__ inline
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
 GridEntity( const GridType& grid,
                const CoordinatesType& coordinates,
-               const EntityOrientationType& orientation,
-               const EntityBasisType& basis )
+               const EntityOrientationType& orientation)
 : grid( grid ),
   entityIndex( -1 ),
   coordinates( coordinates )
@@ -561,19 +511,6 @@ getOrientation() const
    return EntityOrientationType( ( IndexType ) 0 );
 }
 
-template< int Dimension,
-          typename Real,
-          typename Device,
-          typename Index,
-          typename Config >
-__cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::EntityBasisType
-GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
-getBasis() const
-{
-   return EntityBasisType( ( IndexType ) 0 );
-}
-
 template< int Dimension,
           typename Real,
           typename Device,
diff --git a/src/TNL/Meshes/GridEntity.h b/src/TNL/Meshes/GridEntity.h
index 19e13c531..05720681f 100644
--- a/src/TNL/Meshes/GridEntity.h
+++ b/src/TNL/Meshes/GridEntity.h
@@ -57,7 +57,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       constexpr static int getEntityDimension() { return EntityDimension; };
 
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
-      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
       typedef typename GridType::PointType PointType;
 
       template< int NeighborEntityDimension = getEntityDimension() >
@@ -74,8 +73,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       __cuda_callable__ inline
       GridEntity( const GridType& grid,
                      const CoordinatesType& coordinates,
-                     const EntityOrientationType& orientation,
-                     const EntityBasisType& basis );
+                     const EntityOrientationType& orientation);
 
       __cuda_callable__ inline
       const CoordinatesType& getCoordinates() const;
@@ -104,12 +102,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       __cuda_callable__ inline
       void setOrientation( const EntityOrientationType& orientation );
 
-      __cuda_callable__ inline
-      const EntityBasisType& getBasis() const;
-
-      __cuda_callable__ inline
-      void setBasis( const EntityBasisType& basis );
-
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
 
@@ -132,8 +124,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
 
       EntityOrientationType orientation;
 
-      EntityBasisType basis;
-
       //__cuda_callable__ inline
       //GridEntity();
 
@@ -167,7 +157,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       constexpr static int getEntityDimension() { return getMeshDimension(); };
 
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
-      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
 
       template< int NeighborEntityDimension = getEntityDimension() >
       using NeighborEntities =
@@ -184,8 +173,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       __cuda_callable__ inline
       GridEntity( const GridType& grid,
                   const CoordinatesType& coordinates,
-                  const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ),
-                  const EntityBasisType& basis = EntityBasisType( ( Index ) 1 ) );
+                  const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ) );
 
       __cuda_callable__ inline
       const CoordinatesType& getCoordinates() const;
@@ -214,12 +202,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       __cuda_callable__ inline
       void setOrientation( const EntityOrientationType& orientation ){};
 
-      __cuda_callable__ inline
-      const EntityBasisType getBasis() const;
-
-      __cuda_callable__ inline
-      void setBasis( const EntityBasisType& basis ){};
-
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
 
@@ -276,7 +258,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       constexpr static int getEntityDimension() { return 0; };
 
       typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
-      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
 
       template< int NeighborEntityDimension = getEntityDimension() >
       using NeighborEntities =
@@ -293,8 +274,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       __cuda_callable__ inline
       GridEntity( const GridType& grid,
                      const CoordinatesType& coordinates,
-                     const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ),
-                     const EntityBasisType& basis = EntityOrientationType( ( Index ) 0 ) );
+                     const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ) );
 
       __cuda_callable__ inline
       const CoordinatesType& getCoordinates() const;
@@ -323,12 +303,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       __cuda_callable__ inline
       void setOrientation( const EntityOrientationType& orientation ){};
 
-      __cuda_callable__ inline
-      const EntityBasisType getBasis() const;
-
-      __cuda_callable__ inline
-      void setBasis( const EntityBasisType& basis ){};
-
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
 
-- 
GitLab


From 82acde6411221738a8a16a450e7a86370a4f4d6e Mon Sep 17 00:00:00 2001
From: hayeuyur <hayeuyur@fit.cvut.cz>
Date: Thu, 9 Dec 2021 14:28:30 +0100
Subject: [PATCH 29/29] Optimization fixes for tnl-grid and parallel for

---
 .../HeatmapNDimGrid/implementation.h          | 18 ++++++++------
 .../HeatmapParallelFor/implementation.h       | 24 ++++++++++++-------
 .../HeatmapTNLGrid/implementation.h           | 21 +++++++++-------
 3 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
index ce9361fac..3a50efd47 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h
@@ -359,11 +359,14 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    const Real hx = params.xDomainSize / (Real)grid.getDimension(0);
    const Real hy = params.yDomainSize / (Real)grid.getDimension(1);
-   const Real hx_inv = 1 / (hx * hx);
-   const Real hy_inv = 1 / (hy * hy);
+   const Real hx_inv = 1. / (hx * hx);
+   const Real hy_inv = 1. / (hy * hy);
 
    auto entitiesCount = grid.getEntitiesCount(0);
    auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
+   auto xDomainSize = params.xDomainSize;
+   auto yDomainSize = params.yDomainSize;
+   auto sigma = params.sigma;
 
    TNL::Containers::Array<Real, Device> ux(entitiesCount), // data at step u
                                         aux(entitiesCount);// data at step u + 1
@@ -378,10 +381,10 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
       auto position = entity.getCoordinates();
       auto index = entity.getIndex();
 
-      auto x = position[0] * hx - params.xDomainSize / 2;
-      auto y = position[1] * hx - params.yDomainSize / 2;
+      auto x = position[0] * hx - xDomainSize / 2;
+      auto y = position[1] * hx - yDomainSize / 2;
 
-      uxView[index] = exp(params.sigma * (x * x + y * y));
+      uxView[index] = exp(sigma * (x * x + y * y));
    };
 
    const Container<2, bool> direction{ false, false };
@@ -398,9 +401,10 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable {
       auto index = entity.getIndex();
+      auto center = 2 * uxView[index];
 
-      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
-                       (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv;
+      auxView[index] = (uxView[index - 1] - center + uxView[index + 1]) * hx_inv +
+                       (uxView[index - xDimension] - center + uxView[index + xDimension]) * hy_inv;
    };
 
    auto update = [=] __cuda_callable__(int i) mutable {
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h
index c8ad25eb9..c16936090 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h
@@ -39,21 +39,26 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    const Real hx = params.xDomainSize / (Real)params.xSize;
    const Real hy = params.yDomainSize / (Real)params.ySize;
-   const Real hx_inv = 1 / (hx * hx);
-   const Real hy_inv = 1 / (hy * hy);
+   const Real hx_inv = 1. / (hx * hx);
+   const Real hy_inv = 1. / (hy * hy);
 
    auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
 
    auto uxView = ux.getView(), auxView = aux.getView();
 
+   auto xSize = params.xSize;
+   auto xDomainSize = params.xDomainSize;
+   auto yDomainSize = params.yDomainSize;
+   auto sigma = params.sigma;
+
    auto init = [=] __cuda_callable__(int i, int j) mutable
    {
-      auto index = j * params.xSize + i;
+      auto index = j * xSize + i;
 
-      auto x = i * hx - params.xDomainSize / 2;
-      auto y = j * hy - params.yDomainSize / 2;
+      auto x = i * hx - xDomainSize / 2.;
+      auto y = j * hy - yDomainSize / 2.;
 
-      uxView[index] = exp(params.sigma * (x * x + y * y));
+      uxView[index] = exp(sigma * (x * x + y * y));
    };
 
    TNL::Algorithms::ParallelFor2D<Device>::exec(1, 1, params.xSize - 1, params.ySize - 1, init);
@@ -63,10 +68,11 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters &params) c
 
    auto next = [=] __cuda_callable__(int i, int j) mutable
    {
-      auto index = j * params.xSize + i;
+      auto index = j * xSize + i;
+      auto center = 2 * uxView[index];
 
-      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
-                       (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv;
+      auxView[index] = (uxView[index - 1] - center + uxView[index + 1]) * hx_inv +
+                       (uxView[index - xSize] - center + uxView[index + xSize]) * hy_inv;
    };
 
    auto update = [=] __cuda_callable__(int i) mutable
diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h
index be4d1a13d..f50b9546a 100644
--- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h
+++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h
@@ -26,21 +26,25 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters& params) c
 
    const Real hx = params.xDomainSize / (Real)params.xSize;
    const Real hy = params.yDomainSize / (Real)params.ySize;
-   const Real hx_inv = 1 / (hx * hx);
-   const Real hy_inv = 1 / (hy * hy);
+   const Real hx_inv = 1. / (hx * hx);
+   const Real hy_inv = 1. / (hy * hy);
 
    auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy);
 
    auto uxView = ux.getView(),
         auxView = aux.getView();
 
+   auto xDomainSize = params.xDomainSize;
+   auto yDomainSize = params.yDomainSize;
+   auto sigma = params.sigma;
+
    auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable {
       auto index = entity.getIndex();
 
-      auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2;
-      auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2;
+      auto x = entity.getCoordinates().x() * hx - xDomainSize / 2.;
+      auto y = entity.getCoordinates().y() * hy - yDomainSize / 2.;
 
-      uxView[index] = exp(params.sigma * (x * x + y * y));
+      uxView[index] = exp(sigma * (x * x + y * y));
    };
 
    grid.template forInterior<0>(init);
@@ -48,12 +52,13 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters& params) c
    if (!writeGNUPlot("data.txt", params, ux))
       return false;
 
+   auto width = grid.getDimensions().x() + 1;
    auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
       auto index = entity.getIndex();
-      auto width = grid.getDimensions().x() + 1;
+      auto center = 2 * uxView[index];
 
-      auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv +
-                        (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv;
+      auxView[index] = (uxView[index - 1] - center + uxView[index + 1]) * hx_inv +
+                        (uxView[index - width] - center + uxView[index + width]) * hy_inv;
    };
 
    auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable {
-- 
GitLab