From 8ab1f885ccfa07b85b820c16874cf775927ea585 Mon Sep 17 00:00:00 2001 From: Hayeu Yury Date: Mon, 18 Oct 2021 10:13:11 +0200 Subject: [PATCH 01/29] Add Docker image as development environment --- Dockerfile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..246faf448 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM nvidia/cuda:11.4.2-devel-ubuntu20.04 + +# Fix timezone +ENV TZ=Europe/Prague +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update +RUN apt-get install -y --no-install-recommends g++ gcc make cmake zlib1g-dev libjpeg-dev libpng-dev libtinyxml2-dev +RUN apt-get install -y --no-install-recommends git openssh-client subversion procps +RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db +RUN apt-get install -y --no-install-recommends zsh ca-certificates curl netbase wget + +RUN git config --global alias.ll 'log --oneline --graph --all --decorate' +RUN git config --global core.editor 'vim' + +WORKDIR /workspace -- GitLab From 27d0c8be4a77f64765fa282062de405b83b6ce6e Mon Sep 17 00:00:00 2001 From: Hayeu Yury Date: Mon, 18 Oct 2021 13:09:55 +0200 Subject: [PATCH 02/29] Basic implementation of the HeatEquasion for host device --- src/Benchmarks/CMakeLists.txt | 1 + .../HeatEquationGrid/CMakeLists.txt | 1 + .../HeatmapParallelFor/CMakeLists.txt | 1 + .../HeatmapParallelFor/main.cpp | 87 +++++++++++++++++++ .../HeatmapParallelFor/main.cu | 0 .../HeatmapParallelFor/main.h | 42 +++++++++ src/Benchmarks/HeatEquationGrid/README.md | 2 + 7 files changed, 134 insertions(+) create mode 100644 src/Benchmarks/HeatEquationGrid/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h create mode 100644 src/Benchmarks/HeatEquationGrid/README.md diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt index 0fc8e0f02..6fa019081 100644 --- a/src/Benchmarks/CMakeLists.txt +++ b/src/Benchmarks/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory( HeatEquation ) +add_subdirectory( HeatEquationGrid ) add_subdirectory( BLAS ) add_subdirectory( NDArray ) add_subdirectory( SpMV ) diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt new file mode 100644 index 000000000..867619052 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt @@ -0,0 +1 @@ +ADD_SUBDIRECTORY( HeatmapParallelFor ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt new file mode 100644 index 000000000..35fbf34c9 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt @@ -0,0 +1 @@ +add_executable( benchmark_heat_parallel_for main.h main.cpp ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp new file mode 100644 index 000000000..2f4d61512 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp @@ -0,0 +1,87 @@ +#include "main.h" + +TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig() +{ + TNL::Config::ConfigDescription config; + + config.addEntry("device", "Device the computation will run on.", "host"); + config.addEntryEnum("host"); + +#ifdef HAVE_CUDA + config.addEntryEnum("cuda"); +#endif + + config.addEntry("grid-x-size", "Grid size along x-axis.", 100); + config.addEntry("grid-y-size", "Grid size along y-axis.", 100); + + config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); + config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); + + config.addEntry("sigma", "Sigma in exponential initial condition.", 2.0); + + config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); + config.addEntry("final-time", "Final time of the simulation.", 1.0); + config.addEntry("verbose", "Verbose mode.", true); + + return config; +} + +HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer& parameters): + xSize(parameters.getParameter("grid-x-size")), + ySize(parameters.getParameter("grid-y-size")), + xDomainSize(parameters.getParameter("domain-x-size")), + yDomainSize(parameters.getParameter("domain-y-size")), + sigma(parameters.getParameter("sigma")), + timeStep(parameters.getParameter("time-step")), + finalTime(parameters.getParameter("final-time")), + verbose(parameters.getParameter("verbose")) {} + +bool HeatmapSolver::Solve(const HeatmapSolver::Parameters& params, TNL::Timer& timer) const { + // This is always an external storage for grid. + TNL::Container::Array ux(params.xSize * params.ySize), // data at step u + aux(params.xSize * params.ySize); // data at step u + 1 + + // Invalidate ux/aux + ux = 0; + aux = 0; + + const double hx = params.xDomainSize / (double)params.xSize; + const double hy = params.yDomainSize / (double)params.ySize; + + auto uxView = ux.getView(), auxView = aux.getView(); + + timer.reset(); + + // TODO: - Initial Condition + + + auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) { + aux[i] = 0; + aux[(params.ySize - 1) * params.xSize + i] = 0; + }; + + auto verticalBoundaryCondition = [=] __cuda_callable__(int i) { + aux[j * params.ySize] = 0; + aux[j * params.ySize + params.xSize - 1] = 0; + }; + + auto next = [=] __cuda_callable__(int i, int j) { + auto index = j * params.ySize + i; + + aux[index] = (u[c - 1] - 2 * u[c] + u[c + 1]) / hx + + (u[c - params.xSize] - 2 * u[c] + u[c + params.xSize]) / hy; + }; + + double time = 0; + + while (time < params.finalTime) { + // TODO: - Do we really need this + TNL::Algorithm::ParallelFor(0, params.xSize, horizontalBoundaryCondition); + TNL::Algorithm::ParallelFor(0, params.ySize, verticalBoundaryCondition); + TNL::Algorithm::ParallelFor2D(1, 1, params.xSize - 1, params.ySize - 1, next); + + time += params.timeStep; + } + + return false; +} diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu new file mode 100644 index 000000000..e69de29bb diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h new file mode 100644 index 000000000..8734f4d2a --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h @@ -0,0 +1,42 @@ + +#include +#include +#include +#include +#include + +#pragma once + +class HeatmapSolver { + public: + class Parameters { + public: + const int xSize, ySize; + const double xDomainSize, yDomainSize; + const double sigma; + const double timeStep, finalTime; + const bool verbose; + + Parameters(const TNL::Config::ParameterContainer& parameters); + + static TNL::Config::ConfigDescription makeInputConfig(); + private: + }; + + bool solve(const Parameters ¶meters, TNL::Timer &timer) const; + + private: +}; + +int main(int argc, char * argv[]) { + TNL::Timer timer; + auto config = HeatmapSolver::Parameters::makeInputConfig(); + + TNL::Config::ParameterContainer parameters; + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; + + auto params = HeatmapSolver::Parameters(parameters); + + return 0; +} diff --git a/src/Benchmarks/HeatEquationGrid/README.md b/src/Benchmarks/HeatEquationGrid/README.md new file mode 100644 index 000000000..e5ba7a298 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/README.md @@ -0,0 +1,2 @@ +This is a temporary folder for grid refactoring testing. + -- GitLab From 8379a52cdc65721e292afe39c8530a9615d484a9 Mon Sep 17 00:00:00 2001 From: Hayeu Yury Date: Wed, 20 Oct 2021 17:14:02 +0200 Subject: [PATCH 03/29] Add device support to implementation --- Dockerfile | 2 +- .../HeatmapParallelFor/main.cpp | 87 +------- .../HeatmapParallelFor/main.cu | 2 + .../HeatmapParallelFor/main.h | 191 ++++++++++++++++-- 4 files changed, 180 insertions(+), 102 deletions(-) diff --git a/Dockerfile b/Dockerfile index 246faf448..e71c1e1b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update RUN apt-get install -y --no-install-recommends g++ gcc make cmake zlib1g-dev libjpeg-dev libpng-dev libtinyxml2-dev RUN apt-get install -y --no-install-recommends git openssh-client subversion procps -RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db +RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db gnuplot RUN apt-get install -y --no-install-recommends zsh ca-certificates curl netbase wget RUN git config --global alias.ll 'log --oneline --graph --all --decorate' diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp index 2f4d61512..7a7a28956 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp @@ -1,87 +1,2 @@ -#include "main.h" - -TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig() -{ - TNL::Config::ConfigDescription config; - - config.addEntry("device", "Device the computation will run on.", "host"); - config.addEntryEnum("host"); - -#ifdef HAVE_CUDA - config.addEntryEnum("cuda"); -#endif - - config.addEntry("grid-x-size", "Grid size along x-axis.", 100); - config.addEntry("grid-y-size", "Grid size along y-axis.", 100); - - config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); - config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); - - config.addEntry("sigma", "Sigma in exponential initial condition.", 2.0); - - config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); - config.addEntry("final-time", "Final time of the simulation.", 1.0); - config.addEntry("verbose", "Verbose mode.", true); - - return config; -} - -HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer& parameters): - xSize(parameters.getParameter("grid-x-size")), - ySize(parameters.getParameter("grid-y-size")), - xDomainSize(parameters.getParameter("domain-x-size")), - yDomainSize(parameters.getParameter("domain-y-size")), - sigma(parameters.getParameter("sigma")), - timeStep(parameters.getParameter("time-step")), - finalTime(parameters.getParameter("final-time")), - verbose(parameters.getParameter("verbose")) {} - -bool HeatmapSolver::Solve(const HeatmapSolver::Parameters& params, TNL::Timer& timer) const { - // This is always an external storage for grid. - TNL::Container::Array ux(params.xSize * params.ySize), // data at step u - aux(params.xSize * params.ySize); // data at step u + 1 - - // Invalidate ux/aux - ux = 0; - aux = 0; - - const double hx = params.xDomainSize / (double)params.xSize; - const double hy = params.yDomainSize / (double)params.ySize; - auto uxView = ux.getView(), auxView = aux.getView(); - - timer.reset(); - - // TODO: - Initial Condition - - - auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) { - aux[i] = 0; - aux[(params.ySize - 1) * params.xSize + i] = 0; - }; - - auto verticalBoundaryCondition = [=] __cuda_callable__(int i) { - aux[j * params.ySize] = 0; - aux[j * params.ySize + params.xSize - 1] = 0; - }; - - auto next = [=] __cuda_callable__(int i, int j) { - auto index = j * params.ySize + i; - - aux[index] = (u[c - 1] - 2 * u[c] + u[c + 1]) / hx + - (u[c - params.xSize] - 2 * u[c] + u[c + params.xSize]) / hy; - }; - - double time = 0; - - while (time < params.finalTime) { - // TODO: - Do we really need this - TNL::Algorithm::ParallelFor(0, params.xSize, horizontalBoundaryCondition); - TNL::Algorithm::ParallelFor(0, params.ySize, verticalBoundaryCondition); - TNL::Algorithm::ParallelFor2D(1, 1, params.xSize - 1, params.ySize - 1, next); - - time += params.timeStep; - } - - return false; -} +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu index e69de29bb..7a7a28956 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h index 8734f4d2a..5aefc49f4 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h @@ -1,42 +1,203 @@ -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #pragma once +template class HeatmapSolver { public: class Parameters { public: const int xSize, ySize; - const double xDomainSize, yDomainSize; - const double sigma; - const double timeStep, finalTime; + const Real xDomainSize, yDomainSize; + const Real sigma; + const Real timeStep, finalTime; const bool verbose; - Parameters(const TNL::Config::ParameterContainer& parameters); + Parameters(const TNL::Config::ParameterContainer ¶meters); static TNL::Config::ConfigDescription makeInputConfig(); - private: }; - bool solve(const Parameters ¶meters, TNL::Timer &timer) const; + template + bool solve(const Parameters ¶meters) const; private: + template + bool writeGNUPlot(const std::string &filename, + const Parameters& parameters, + const TNL::Containers::Array& map) const; }; +template +TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig() +{ + TNL::Config::ConfigDescription config; + + config.addEntry("device", "Device the computation will run on.", "host"); + config.addEntryEnum("host"); + +#ifdef HAVE_CUDA + config.addEntryEnum("cuda"); +#endif + + config.addEntry("grid-x-size", "Grid size along x-axis.", 100); + config.addEntry("grid-y-size", "Grid size along y-axis.", 100); + + config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); + config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); + + config.addEntry("sigma", "Sigma in exponential initial condition.", 2.0); + + config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); + config.addEntry("final-time", "Final time of the simulation.", 1.0); + config.addEntry("verbose", "Verbose mode.", true); + + return config; +} + +template +HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer ¶meters) : xSize(parameters.getParameter("grid-x-size")), + ySize(parameters.getParameter("grid-y-size")), + xDomainSize(parameters.getParameter("domain-x-size")), + yDomainSize(parameters.getParameter("domain-y-size")), + sigma(parameters.getParameter("sigma")), + timeStep(parameters.getParameter("time-step")), + finalTime(parameters.getParameter("final-time")), + verbose(parameters.getParameter("verbose")) {} + +/*** + * + * ySize|j (ySize - 1) * xSize + xSize - 1 + * |------------------------------------------------------ + * | + * | + * | + * | + * | + * | + * | + * | + * | + * |------------------------------------------------------> + * + * 0 xSize|i + * + * j * xSize + i + ***/ +template +template +bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) const +{ + // This is always an external storage for grid. + TNL::Containers::Array ux(params.xSize * params.ySize), // data at step u + aux(params.xSize * params.ySize); // data at step u + 1 + + // Invalidate ux/aux + ux = 0; + aux = 0; + + const Real hx = params.xDomainSize / (Real)params.xSize; + const Real hy = params.yDomainSize / (Real)params.ySize; + const Real hx_inv = 1 / (hx * hx); + const Real hy_inv = 1 / (hy * hy); + + auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + + auto uxView = ux.getView(), + auxView = aux.getView(); + + auto init = [=] __cuda_callable__(int i, int j) mutable { + auto index = j * params.xSize + i; + + uxView[index] = exp(-params.sigma * (i * i + j * j)); + }; + + TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, init); + + if (!writeGNUPlot("data.txt", params, ux)) + return false; + + // auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) mutable { + // auxView[i] = 0; + // auxView[(params.ySize - 1) * params.xSize + i] = 0; + // }; + + // auto verticalBoundaryCondition = [=] __cuda_callable__(int j) mutable { + // auxView[j * params.xSize] = 0; + // auxView[j * params.xSize + params.xSize - 1] = 0; + // }; + + // TNL::Algorithms::ParallelFor::exec(0, params.xSize, horizontalBoundaryCondition); + // TNL::Algorithms::ParallelFor::exec(0, params.ySize, verticalBoundaryCondition); + + auto next = [=] __cuda_callable__(int i, int j) mutable { + auto index = j * params.ySize + i; + + auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + + (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv; + }; + + auto update = [=] __cuda_callable__(int i) mutable { + uxView[i] += auxView[i] * timestep; + }; + + Real start = 0; + + while (start < params.finalTime) + { + TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, next); + TNL::Algorithms::ParallelFor::exec(0, params.xSize * params.ySize, update); + + std::cout << "Iteration: " << start << std::endl; + start += timestep; + } + + return writeGNUPlot("data_final.txt", params, ux); +} + +template +template +bool HeatmapSolver::writeGNUPlot(const std::string &filename, + const HeatmapSolver::Parameters ¶ms, + const TNL::Containers::Array &map) const { + std::ofstream out(filename, std::ios::out); + + if (!out.is_open()) + return false; + + for (int j = 0; j < params.ySize; j++) + for (int i = 0; i < params.xSize; i++) + out << i << " " << j << " " << map[j * params.xSize + i] << std::endl; + + return out.good(); +} + int main(int argc, char * argv[]) { - TNL::Timer timer; - auto config = HeatmapSolver::Parameters::makeInputConfig(); + using Real = double; + + auto config = HeatmapSolver::Parameters::makeInputConfig(); TNL::Config::ParameterContainer parameters; if (!parseCommandLine(argc, argv, config, parameters)) return EXIT_FAILURE; - auto params = HeatmapSolver::Parameters(parameters); + auto device = parameters.getParameter("device"); + auto params = HeatmapSolver::Parameters(parameters); + + HeatmapSolver solver; + + if (device == "host" && !solver.solve(params)) + return EXIT_FAILURE; + +#ifdef HAVE_CUDA + if (device == "cuda" && !solver.solve(params)) + return EXIT_FAILURE; +#endif - return 0; + return EXIT_SUCCESS; } -- GitLab From a3d3f84f433b774b5792e171335d33d48fc2a43a Mon Sep 17 00:00:00 2001 From: Hayeu Yury Date: Wed, 20 Oct 2021 21:56:56 +0200 Subject: [PATCH 04/29] Implement fixes to the initial condition --- .../HeatmapParallelFor/CMakeLists.txt | 6 ++++- .../HeatmapParallelFor/main.h | 27 ++++++------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt index 35fbf34c9..80f43845f 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt @@ -1 +1,5 @@ -add_executable( benchmark_heat_parallel_for main.h main.cpp ) +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu ) +ELSE() + add_executable( benchmark_heat_parallel_for main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h index 5aefc49f4..bc6e47abc 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h @@ -51,10 +51,10 @@ TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig( config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); - config.addEntry("sigma", "Sigma in exponential initial condition.", 2.0); + config.addEntry("sigma", "Sigma in exponential initial condition.", 1.0); config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); - config.addEntry("final-time", "Final time of the simulation.", 1.0); + config.addEntry("final-time", "Final time of the simulation.", 0.012); config.addEntry("verbose", "Verbose mode.", true); return config; @@ -71,6 +71,7 @@ HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContaine verbose(parameters.getParameter("verbose")) {} /*** + * Grid parameters: * * ySize|j (ySize - 1) * xSize + xSize - 1 * |------------------------------------------------------ @@ -108,13 +109,15 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); - auto uxView = ux.getView(), - auxView = aux.getView(); + auto uxView = ux.getView(), auxView = aux.getView(); auto init = [=] __cuda_callable__(int i, int j) mutable { auto index = j * params.xSize + i; - uxView[index] = exp(-params.sigma * (i * i + j * j)); + auto x = i * hx - params.xDomainSize / 2; + auto y = j * hy - params.yDomainSize / 2; + + uxView[index] = exp(params.sigma * (x * x + y * y)); }; TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, init); @@ -122,19 +125,6 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c if (!writeGNUPlot("data.txt", params, ux)) return false; - // auto horizontalBoundaryCondition = [=] __cuda_callable__ (int i) mutable { - // auxView[i] = 0; - // auxView[(params.ySize - 1) * params.xSize + i] = 0; - // }; - - // auto verticalBoundaryCondition = [=] __cuda_callable__(int j) mutable { - // auxView[j * params.xSize] = 0; - // auxView[j * params.xSize + params.xSize - 1] = 0; - // }; - - // TNL::Algorithms::ParallelFor::exec(0, params.xSize, horizontalBoundaryCondition); - // TNL::Algorithms::ParallelFor::exec(0, params.ySize, verticalBoundaryCondition); - auto next = [=] __cuda_callable__(int i, int j) mutable { auto index = j * params.ySize + i; @@ -153,7 +143,6 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, next); TNL::Algorithms::ParallelFor::exec(0, params.xSize * params.ySize, update); - std::cout << "Iteration: " << start << std::endl; start += timestep; } -- GitLab From 5b772f3cd885760f9a06df29f8a1d7d80c60b7b7 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 21 Oct 2021 08:32:06 +0200 Subject: [PATCH 05/29] Fix code alignment. Remove Dockerfile --- Dockerfile | 16 -- .../HeatmapParallelFor/CMakeLists.txt | 4 +- .../HeatmapParallelFor/main.h | 219 +++++++++--------- 3 files changed, 115 insertions(+), 124 deletions(-) delete mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index e71c1e1b6..000000000 --- a/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM nvidia/cuda:11.4.2-devel-ubuntu20.04 - -# Fix timezone -ENV TZ=Europe/Prague -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN apt-get update -RUN apt-get install -y --no-install-recommends g++ gcc make cmake zlib1g-dev libjpeg-dev libpng-dev libtinyxml2-dev -RUN apt-get install -y --no-install-recommends git openssh-client subversion procps -RUN apt-get install -y --no-install-recommends vim valgrind gpg man-db gnuplot -RUN apt-get install -y --no-install-recommends zsh ca-certificates curl netbase wget - -RUN git config --global alias.ll 'log --oneline --graph --all --decorate' -RUN git config --global core.editor 'vim' - -WORKDIR /workspace diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt index 80f43845f..054a28cf8 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt @@ -1,5 +1,5 @@ if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu ) + CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu ) ELSE() - add_executable( benchmark_heat_parallel_for main.h main.cpp ) + add_executable( benchmark_heat_parallel_for main.h main.cpp ) ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h index bc6e47abc..311fc94e5 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h @@ -1,66 +1,68 @@ -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #pragma once -template -class HeatmapSolver { - public: - class Parameters { - public: - const int xSize, ySize; - const Real xDomainSize, yDomainSize; - const Real sigma; - const Real timeStep, finalTime; - const bool verbose; - - Parameters(const TNL::Config::ParameterContainer ¶meters); - - static TNL::Config::ConfigDescription makeInputConfig(); - }; - - template - bool solve(const Parameters ¶meters) const; - - private: - template - bool writeGNUPlot(const std::string &filename, - const Parameters& parameters, - const TNL::Containers::Array& map) const; +template +class HeatmapSolver +{ +public: + class Parameters + { + public: + const int xSize, ySize; + const Real xDomainSize, yDomainSize; + const Real sigma; + const Real timeStep, finalTime; + const bool verbose; + + Parameters(const TNL::Config::ParameterContainer ¶meters); + + static TNL::Config::ConfigDescription makeInputConfig(); + }; + + template + bool solve(const Parameters ¶meters) const; + +private: + template + bool writeGNUPlot(const std::string &filename, + const Parameters ¶meters, + const TNL::Containers::Array &map) const; }; template TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig() { - TNL::Config::ConfigDescription config; + TNL::Config::ConfigDescription config; - config.addEntry("device", "Device the computation will run on.", "host"); - config.addEntryEnum("host"); + config.addEntry("device", "Device the computation will run on.", "host"); + config.addEntryEnum("host"); #ifdef HAVE_CUDA - config.addEntryEnum("cuda"); + config.addEntryEnum("cuda"); #endif - config.addEntry("grid-x-size", "Grid size along x-axis.", 100); - config.addEntry("grid-y-size", "Grid size along y-axis.", 100); + config.addEntry("grid-x-size", "Grid size along x-axis.", 100); + config.addEntry("grid-y-size", "Grid size along y-axis.", 100); - config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); - config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); + config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); + config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); - config.addEntry("sigma", "Sigma in exponential initial condition.", 1.0); + config.addEntry("sigma", "Sigma in exponential initial condition.", 1.0); - config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); - config.addEntry("final-time", "Final time of the simulation.", 0.012); - config.addEntry("verbose", "Verbose mode.", true); + config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); + config.addEntry("final-time", "Final time of the simulation.", 0.012); + config.addEntry("verbose", "Verbose mode.", true); - return config; + return config; } -template +template HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer ¶meters) : xSize(parameters.getParameter("grid-x-size")), ySize(parameters.getParameter("grid-y-size")), xDomainSize(parameters.getParameter("domain-x-size")), @@ -73,7 +75,7 @@ HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContaine /*** * Grid parameters: * - * ySize|j (ySize - 1) * xSize + xSize - 1 + * ySize-1|j (ySize - 1) * xSize + xSize - 1 * |------------------------------------------------------ * | * | @@ -86,107 +88,112 @@ HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContaine * | * |------------------------------------------------------> * - * 0 xSize|i + * 0 xSize-1|i * * j * xSize + i ***/ -template -template +template +template bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) const { - // This is always an external storage for grid. - TNL::Containers::Array ux(params.xSize * params.ySize), // data at step u - aux(params.xSize * params.ySize); // data at step u + 1 + // This is always an external storage for grid. + TNL::Containers::Array ux(params.xSize * params.ySize), // data at step u + aux(params.xSize * params.ySize);// data at step u + 1 - // Invalidate ux/aux - ux = 0; - aux = 0; + // Invalidate ux/aux + ux = 0; + aux = 0; - const Real hx = params.xDomainSize / (Real)params.xSize; - const Real hy = params.yDomainSize / (Real)params.ySize; - const Real hx_inv = 1 / (hx * hx); - const Real hy_inv = 1 / (hy * hy); + const Real hx = params.xDomainSize / (Real)params.xSize; + const Real hy = params.yDomainSize / (Real)params.ySize; + const Real hx_inv = 1 / (hx * hx); + const Real hy_inv = 1 / (hy * hy); - auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); - auto uxView = ux.getView(), auxView = aux.getView(); + auto uxView = ux.getView(), auxView = aux.getView(); - auto init = [=] __cuda_callable__(int i, int j) mutable { - auto index = j * params.xSize + i; + auto init = [=] __cuda_callable__(int i, int j) mutable + { + auto index = j * params.xSize + i; - auto x = i * hx - params.xDomainSize / 2; - auto y = j * hy - params.yDomainSize / 2; + auto x = i * hx - params.xDomainSize / 2; + auto y = j * hy - params.yDomainSize / 2; - uxView[index] = exp(params.sigma * (x * x + y * y)); - }; + uxView[index] = exp(params.sigma * (x * x + y * y)); + }; - TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, init); + TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, init); - if (!writeGNUPlot("data.txt", params, ux)) - return false; + if (!writeGNUPlot("data.txt", params, ux)) + return false; - auto next = [=] __cuda_callable__(int i, int j) mutable { - auto index = j * params.ySize + i; + auto next = [=] __cuda_callable__(int i, int j) mutable + { + auto index = j * params.ySize + i; - auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + - (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv; - }; + auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + + (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv; + }; - auto update = [=] __cuda_callable__(int i) mutable { - uxView[i] += auxView[i] * timestep; - }; + auto update = [=] __cuda_callable__(int i) mutable + { + uxView[i] += auxView[i] * timestep; + }; - Real start = 0; + Real start = 0; - while (start < params.finalTime) - { - TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, next); - TNL::Algorithms::ParallelFor::exec(0, params.xSize * params.ySize, update); + while (start < params.finalTime) + { + TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, next); + TNL::Algorithms::ParallelFor::exec(0, params.xSize * params.ySize, update); - start += timestep; - } + start += timestep; + } - return writeGNUPlot("data_final.txt", params, ux); + return writeGNUPlot("data_final.txt", params, ux); } template template bool HeatmapSolver::writeGNUPlot(const std::string &filename, const HeatmapSolver::Parameters ¶ms, - const TNL::Containers::Array &map) const { - std::ofstream out(filename, std::ios::out); + const TNL::Containers::Array &map) const +{ + std::ofstream out(filename, std::ios::out); - if (!out.is_open()) - return false; + if (!out.is_open()) + return false; - for (int j = 0; j < params.ySize; j++) - for (int i = 0; i < params.xSize; i++) - out << i << " " << j << " " << map[j * params.xSize + i] << std::endl; + for (int j = 0; j < params.ySize; j++) + for (int i = 0; i < params.xSize; i++) + out << i << " " << j << " " << map[j * params.xSize + i] << std::endl; - return out.good(); + return out.good(); } -int main(int argc, char * argv[]) { - using Real = double; +int main(int argc, char *argv[]) +{ + using Real = double; - auto config = HeatmapSolver::Parameters::makeInputConfig(); + auto config = HeatmapSolver::Parameters::makeInputConfig(); - TNL::Config::ParameterContainer parameters; - if (!parseCommandLine(argc, argv, config, parameters)) - return EXIT_FAILURE; + TNL::Config::ParameterContainer parameters; + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; - auto device = parameters.getParameter("device"); - auto params = HeatmapSolver::Parameters(parameters); + auto device = parameters.getParameter("device"); + auto params = HeatmapSolver::Parameters(parameters); - HeatmapSolver solver; + HeatmapSolver solver; - if (device == "host" && !solver.solve(params)) - return EXIT_FAILURE; + if (device == "host" && !solver.solve(params)) + return EXIT_FAILURE; #ifdef HAVE_CUDA - if (device == "cuda" && !solver.solve(params)) - return EXIT_FAILURE; + if (device == "cuda" && !solver.solve(params)) + return EXIT_FAILURE; #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } -- GitLab From c2a06a20975193327281fcdf7c58398135d937b8 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Tue, 2 Nov 2021 18:33:13 +0100 Subject: [PATCH 06/29] Implement dimension set/get --- src/Benchmarks/CMakeLists.txt | 21 ++++- .../HeatEquationGrid/CMakeLists.txt | 3 +- .../HeatmapGrid/CMakeLists.txt | 5 ++ .../HeatEquationGrid/HeatmapGrid/main.cpp | 2 + .../HeatEquationGrid/HeatmapGrid/main.cu | 2 + .../HeatEquationGrid/HeatmapGrid/main.h | 78 +++++++++++++++++++ 6 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt index 6fa019081..4ef9404fc 100644 --- a/src/Benchmarks/CMakeLists.txt +++ b/src/Benchmarks/CMakeLists.txt @@ -1,5 +1,6 @@ -add_subdirectory( HeatEquation ) +#add_subdirectory( HeatEquation ) add_subdirectory( HeatEquationGrid ) +<<<<<<< HEAD add_subdirectory( BLAS ) add_subdirectory( NDArray ) add_subdirectory( SpMV ) @@ -8,3 +9,21 @@ add_subdirectory( LinearSolvers ) add_subdirectory( ODESolvers ) add_subdirectory( Sorting ) add_subdirectory( Traversers ) +======= +#add_subdirectory( BLAS ) +#add_subdirectory( NDArray ) +#add_subdirectory( SpMV ) +#add_subdirectory( DistSpMV ) +#add_subdirectory( LinearSolvers ) +#add_subdirectory( ODESolvers ) +#add_subdirectory( Sorting ) +#add_subdirectory( Traversers ) + +set( headers + Benchmarks.h + FunctionTimer.h + Logging.h +) + +install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks ) +>>>>>>> 049219f08 (Implement dimension set/get) diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt index 867619052..d7cbc2027 100644 --- a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt @@ -1 +1,2 @@ -ADD_SUBDIRECTORY( HeatmapParallelFor ) +#ADD_SUBDIRECTORY( HeatmapParallelFor ) +ADD_SUBDIRECTORY( HeatmapGrid ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt new file mode 100644 index 000000000..a4c44b591 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt @@ -0,0 +1,5 @@ +#if (BUILD_CUDA) +# CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu ) +#ELSE() + add_executable( benchmark_heat_grid main.h main.cpp ) +#ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h new file mode 100644 index 000000000..6c910144c --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -0,0 +1,78 @@ + +#include +#include +#include +#include + +#include +#include + + +// Utils + +// Thanks for implementation idea https://www.fluentcpp.com/2021/04/30/how-to-implement-stdconjunction-and-stddisjunction-in-c11/ +// TODO: Implicit bool looks strange because, then I need to write ::value everywhere... + +template struct bool_pack {}; + +template +using conjunction = std::is_same, bool_pack>; + +// Any N dimensional grid: +// 1. Should provide local|remote step +// 2. Should know its own limits. Count of the elements at each dimension + +template +class Grid { + public: + Grid() {} + ~Grid() {} + + template ::value...>::value>, + typename = std::enable_if_t> + void setDimensions(Dimensions... dimensions) noexcept { + Index i = 0; + + for (auto x: { dimensions... }) { + this -> dimensions[i] = x; + i++; + } + } + + template ::value...>::value>> + std::vector getDimensions(DimensionIndex... indicies) const noexcept { + std::vector index{ indicies... }, result; + + std::transform(index.begin(), index.end(), std::back_inserter(result), [this](const Index& index) { + return this -> dimensions[index]; + }); + + return result; + } + + private: + Index dimensions[Dimension]; +}; + +int main(int argc, char *argv[]) { + Grid<4, int> grid; + + grid.setDimensions(1, 2, 3, 4); + + auto dimensions = grid.getDimensions(0, 1, 2); + + std::cout << dimensions[0] << dimensions[2] << dimensions[1] << std::endl; + + + // const int dimensions = 4; + + // int sizes[dimensions] = { 3, 3, 3, 3 }; + // int limits[dimensions] = { 0 }; + + + return 0; +} -- GitLab From 0cbf5fb41c8ec2db67c77275dd4bdfb9e3d7ccf6 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 4 Nov 2021 14:22:47 +0100 Subject: [PATCH 07/29] Implement n-dimensional grid maximal indexes --- .../HeatEquationGrid/HeatmapGrid/main.h | 87 +++++++++++++++---- 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 6c910144c..5a1f736ef 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -10,9 +11,6 @@ // Utils -// Thanks for implementation idea https://www.fluentcpp.com/2021/04/30/how-to-implement-stdconjunction-and-stddisjunction-in-c11/ -// TODO: Implicit bool looks strange because, then I need to write ::value everywhere... - template struct bool_pack {}; template @@ -24,7 +22,8 @@ using conjunction = std::is_same, bool_pack> template + typename Device = TNL::Devices::Host, + typename = std::enable_if_t<(Dimension > 0)>> class Grid { public: Grid() {} @@ -40,8 +39,11 @@ class Grid { this -> dimensions[i] = x; i++; } + + refreshDimensionMaps(); } + // TODO: - Add __cuda_callable__ template ::value...>::value>> std::vector getDimensions(DimensionIndex... indicies) const noexcept { @@ -54,25 +56,76 @@ class Grid { return result; } - private: - Index dimensions[Dimension]; -}; - -int main(int argc, char *argv[]) { - Grid<4, int> grid; - - grid.setDimensions(1, 2, 3, 4); + // TODO: - Implement typechecking for function + template + void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept { - auto dimensions = grid.getDimensions(0, 1, 2); + } + private: + std::array dimensions; + /** + * @brief - A dimension map is a store for dimension limits over all combinations of basis. + * First, (n choose 0) elements will contain the count of 0 dimension elements + * Second, (n choose 1) elements will contain the count of 1-dimension elements + * .... + * + * For example, let's have a 3-d grid, then the map indexing will be the next: + * 0 - 0 - count of vertices + * 1, 2, 3 - count of edges in x, y, z plane + * 4, 5, 6 - count of faces in xy, xz, yz plane + * 7 - count of cells in xyz plane + * + * @warning - The ordering of is lexigraphical. + */ + std::array dimensionMap; + /** + * @brief - A cumulative map over dimensions. + */ + std::array cumulativeDimensionMap; + + /** + * @brief - Fills dimensions map for N-dimensional Grid. + * + * @complexity - O(2 ^ Dimension) + */ + void refreshDimensionMaps() noexcept { + std::array combinationBuffer = {}; + std::size_t j = 0; + + std::fill(cumulativeDimensionMap.begin(), cumulativeDimensionMap.end(), 0); + + for (std::size_t i = 0; i <= Dimension; i++) { + std::fill(combinationBuffer.begin(), combinationBuffer.end(), false); + std::fill(combinationBuffer.end() - i, combinationBuffer.end(), true); + + do { + int result = 1; + + for (std::size_t i = 0; i < combinationBuffer.size(); i++) + result *= combinationBuffer[i] ? dimensions[i] : dimensions[i] + 1; + + dimensionMap[j] = result; + cumulativeDimensionMap[i] += result; + + j++; + } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end())); + } - std::cout << dimensions[0] << dimensions[2] << dimensions[1] << std::endl; + for (int i = 0; i < 1 << Dimension; i++) { + std::cout << dimensionMap[i] << " "; + } + std::cout << std::endl; + } +}; - // const int dimensions = 4; +int main(int argc, char *argv[]) { + Grid<3, int> grid; - // int sizes[dimensions] = { 3, 3, 3, 3 }; - // int limits[dimensions] = { 0 }; + grid.setDimensions(1, 2, 3); + // auto dimensions = grid.getDimensions(0, 1, 2); return 0; } -- GitLab From 96d7804df065bec9c1d206379a772b14bd19a8b0 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 4 Nov 2021 15:16:20 +0100 Subject: [PATCH 08/29] Move out separate heatmap solver implementation. --- .../HeatEquationGrid/Base/HeatmapSolver.h | 94 +++++++++++++++++++ .../HeatEquationGrid/CMakeLists.txt | 2 +- .../HeatEquationGrid/HeatmapGrid/main.h | 87 ++++++++++++----- .../HeatmapParallelFor/CMakeLists.txt | 4 +- .../HeatmapParallelFor/main.h | 89 +----------------- 5 files changed, 166 insertions(+), 110 deletions(-) create mode 100644 src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h new file mode 100644 index 000000000..92a2b4611 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h @@ -0,0 +1,94 @@ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +template +class HeatmapSolver +{ +public: + class Parameters + { + public: + const int xSize, ySize; + const Real xDomainSize, yDomainSize; + const Real sigma; + const Real timeStep, finalTime; + const bool verbose; + + Parameters(const TNL::Config::ParameterContainer ¶meters); + + static TNL::Config::ConfigDescription makeInputConfig(); + }; + + template + bool solve(const Parameters ¶meters) const; + +private: + template + bool writeGNUPlot(const std::string &filename, + const Parameters ¶meters, + const TNL::Containers::Array &map) const; +}; + +template +TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig() +{ + TNL::Config::ConfigDescription config; + + config.addEntry("device", "Device the computation will run on.", "host"); + config.addEntryEnum("host"); + +#ifdef HAVE_CUDA + config.addEntryEnum("cuda"); +#endif + + config.addEntry("grid-x-size", "Grid size along x-axis.", 100); + config.addEntry("grid-y-size", "Grid size along y-axis.", 100); + + config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); + config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); + + config.addEntry("sigma", "Sigma in exponential initial condition.", 1.0); + + config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); + config.addEntry("final-time", "Final time of the simulation.", 0.012); + config.addEntry("verbose", "Verbose mode.", true); + + return config; +} + +template +HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer ¶meters) : xSize(parameters.getParameter("grid-x-size")), + ySize(parameters.getParameter("grid-y-size")), + xDomainSize(parameters.getParameter("domain-x-size")), + yDomainSize(parameters.getParameter("domain-y-size")), + sigma(parameters.getParameter("sigma")), + timeStep(parameters.getParameter("time-step")), + finalTime(parameters.getParameter("final-time")), + verbose(parameters.getParameter("verbose")) {} + +template +template +bool HeatmapSolver::writeGNUPlot(const std::string &filename, + const HeatmapSolver::Parameters ¶ms, + const TNL::Containers::Array &map) const +{ + std::ofstream out(filename, std::ios::out); + + if (!out.is_open()) + return false; + + for (int j = 0; j < params.ySize; j++) + for (int i = 0; i < params.xSize; i++) + out << i << " " << j << " " << map[j * params.xSize + i] << std::endl; + + return out.good(); +} diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt index d7cbc2027..9adc7520d 100644 --- a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt @@ -1,2 +1,2 @@ -#ADD_SUBDIRECTORY( HeatmapParallelFor ) +ADD_SUBDIRECTORY( HeatmapParallelFor ) ADD_SUBDIRECTORY( HeatmapGrid ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 5a1f736ef..5f5e2cb4e 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -5,21 +5,20 @@ #include #include +#include "../Base/HeatmapSolver.h" + #include #include +#include +#include - -// Utils +#pragma once template struct bool_pack {}; template using conjunction = std::is_same, bool_pack>; -// Any N dimensional grid: -// 1. Should provide local|remote step -// 2. Should know its own limits. Count of the elements at each dimension - template::value...>::value>, typename = std::enable_if_t> @@ -42,24 +45,54 @@ class Grid { refreshDimensionMaps(); } - - // TODO: - Add __cuda_callable__ + /** + * @param[in] indicies - A dimension index pack + * + * - TODO: - Add __cuda_callable__ + */ template ::value...>::value>> std::vector getDimensions(DimensionIndex... indicies) const noexcept { - std::vector index{ indicies... }, result; + std::vector dimensionIndicies{ indicies... }, result; + + std::transform(dimensionIndicies.begin(), + dimensionIndicies.end(), + std::back_inserter(result), [this](const Index& index) { + TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension"); - std::transform(index.begin(), index.end(), std::back_inserter(result), [this](const Index& index) { return this -> dimensions[index]; }); return result; } + /** + * @brief - Returns the number of entities of specific dimension + */ + template ::value...>::value>> + std::vector getEntitiesCount(const Index dimension...) const noexcept + { + std::vector dimensionIndicies{indicies...}, result; + + std::transform(dimensionIndicies.begin(), + dimensionIndicies.end(), + std::back_inserter(result), [this](const Index &index) { + TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension"); + + return this->cumulativeDimensionMap[index]; + }); + + return result; + } + /** + * @brief - Traversers a specified dimension in parallel. + */ + template + void traverse(const Index dimension, Function function) const noexcept { + auto entitiesCount = getEntitiesCount(dimension)[0]; - // TODO: - Implement typechecking for function - template - void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept { } private: @@ -83,7 +116,6 @@ class Grid { * @brief - A cumulative map over dimensions. */ std::array cumulativeDimensionMap; - /** * @brief - Fills dimensions map for N-dimensional Grid. * @@ -111,13 +143,26 @@ class Grid { j++; } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end())); } + } +}; + +template +template +bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) const { + Grid<2, int, Device> grid; + + grid.setDimensions(params.xSize, params.ySize); + + auto entitiesCount = grid.getEntitiesCount(2)[0]; + + TNL::Containers::Array ux(entitiesCount), // data at step u + aux(entitiesCount);// data at step u + 1 + + // Invalidate ux/aux + ux = 0; + aux = 0; - for (int i = 0; i < 1 << Dimension; i++) { - std::cout << dimensionMap[i] << " "; - } - std::cout << std::endl; - } }; int main(int argc, char *argv[]) { @@ -125,7 +170,7 @@ int main(int argc, char *argv[]) { grid.setDimensions(1, 2, 3); - // auto dimensions = grid.getDimensions(0, 1, 2); + return 0; } diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt index 054a28cf8..51ee69d15 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt @@ -1,5 +1,5 @@ if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for main.h main.cu ) + CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cu ) ELSE() - add_executable( benchmark_heat_parallel_for main.h main.cpp ) + add_executable( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cpp ) ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h index 311fc94e5..4e84e0d8d 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h @@ -5,72 +5,9 @@ #include #include -#pragma once - -template -class HeatmapSolver -{ -public: - class Parameters - { - public: - const int xSize, ySize; - const Real xDomainSize, yDomainSize; - const Real sigma; - const Real timeStep, finalTime; - const bool verbose; - - Parameters(const TNL::Config::ParameterContainer ¶meters); - - static TNL::Config::ConfigDescription makeInputConfig(); - }; - - template - bool solve(const Parameters ¶meters) const; - -private: - template - bool writeGNUPlot(const std::string &filename, - const Parameters ¶meters, - const TNL::Containers::Array &map) const; -}; - -template -TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig() -{ - TNL::Config::ConfigDescription config; - - config.addEntry("device", "Device the computation will run on.", "host"); - config.addEntryEnum("host"); - -#ifdef HAVE_CUDA - config.addEntryEnum("cuda"); -#endif - - config.addEntry("grid-x-size", "Grid size along x-axis.", 100); - config.addEntry("grid-y-size", "Grid size along y-axis.", 100); +#include "../Base/HeatmapSolver.h" - config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); - config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); - - config.addEntry("sigma", "Sigma in exponential initial condition.", 1.0); - - config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); - config.addEntry("final-time", "Final time of the simulation.", 0.012); - config.addEntry("verbose", "Verbose mode.", true); - - return config; -} - -template -HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContainer ¶meters) : xSize(parameters.getParameter("grid-x-size")), - ySize(parameters.getParameter("grid-y-size")), - xDomainSize(parameters.getParameter("domain-x-size")), - yDomainSize(parameters.getParameter("domain-y-size")), - sigma(parameters.getParameter("sigma")), - timeStep(parameters.getParameter("time-step")), - finalTime(parameters.getParameter("final-time")), - verbose(parameters.getParameter("verbose")) {} +#pragma once /*** * Grid parameters: @@ -96,7 +33,6 @@ template template bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) const { - // This is always an external storage for grid. TNL::Containers::Array ux(params.xSize * params.ySize), // data at step u aux(params.xSize * params.ySize);// data at step u + 1 @@ -154,26 +90,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c return writeGNUPlot("data_final.txt", params, ux); } -template -template -bool HeatmapSolver::writeGNUPlot(const std::string &filename, - const HeatmapSolver::Parameters ¶ms, - const TNL::Containers::Array &map) const -{ - std::ofstream out(filename, std::ios::out); - - if (!out.is_open()) - return false; - - for (int j = 0; j < params.ySize; j++) - for (int i = 0; i < params.xSize; i++) - out << i << " " << j << " " << map[j * params.xSize + i] << std::endl; - - return out.good(); -} - -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { using Real = double; auto config = HeatmapSolver::Parameters::makeInputConfig(); -- GitLab From ae1ed75418fccffad116d205d2eb9d510a4a88a2 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Tue, 9 Nov 2021 16:26:09 +0100 Subject: [PATCH 09/29] Implement basic traversal algorithm. Fix indexing in ParallelFor benchmark --- .../HeatEquationGrid/HeatmapGrid/main.h | 76 +++++++++++++++---- .../HeatmapParallelFor/main.h | 2 +- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 5f5e2cb4e..9debca43f 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -45,43 +45,57 @@ class Grid { refreshDimensionMaps(); } + /** + * @param[in] index - index of dimension + */ + inline __cuda_callable__ Index getDimension(Index index) const noexcept { + TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension"); + + return dimensions[index]; + } /** * @param[in] indicies - A dimension index pack * * - TODO: - Add __cuda_callable__ */ template ::value...>::value>> + typename = std::enable_if_t::value...>::value>, + typename = std::enable_if_t 0>> std::vector getDimensions(DimensionIndex... indicies) const noexcept { std::vector dimensionIndicies{ indicies... }, result; std::transform(dimensionIndicies.begin(), dimensionIndicies.end(), std::back_inserter(result), [this](const Index& index) { - TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); - TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension"); - - return this -> dimensions[index]; + return this -> getDimension(index); }); return result; } + /** + * @param[in] index - index of dimension + */ + inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept { + TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension"); + + return cumulativeDimensionMap(index); + } /** * @brief - Returns the number of entities of specific dimension */ template ::value...>::value>> - std::vector getEntitiesCount(const Index dimension...) const noexcept + typename = std::enable_if_t::value...>::value>, + typename = std::enable_if_t 0>> + std::vector getEntitiesCounts(DimensionIndex... indicies) const noexcept { std::vector dimensionIndicies{indicies...}, result; std::transform(dimensionIndicies.begin(), dimensionIndicies.end(), std::back_inserter(result), [this](const Index &index) { - TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); - TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension"); - - return this->cumulativeDimensionMap[index]; + return getEntitiesCount(index); }); return result; @@ -89,11 +103,33 @@ class Grid { /** * @brief - Traversers a specified dimension in parallel. */ - template - void traverse(const Index dimension, Function function) const noexcept { + template + void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept { auto entitiesCount = getEntitiesCount(dimension)[0]; + auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index }; - + traverse(0, entitiesCount, identity, function, args...); + } + /** + * TODO: - A possibility of improvement, as it should be possible to specify more precise functions + * to remove user knowledge of functionality + */ + template + void traverse(const Index start, + const Index end, + IndexTransform transform, + Function function, + FunctionArgs... args) const noexcept { + TNL_ASSERT_LT(startIndex, endIndex, "Start index must be less than endIndex") + + auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable + { + auto transformedIndex = transform(index); + + function(transformedIndex, args...); + }; + + TNL::Algorithms::ParallelFor::exec(startIndex, endIndex, lambda, args...); } private: std::array dimensions; @@ -153,7 +189,15 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c grid.setDimensions(params.xSize, params.ySize); - auto entitiesCount = grid.getEntitiesCount(2)[0]; + const Real hx = params.xDomainSize / (Real)grid.getDimension(0); + const Real hy = params.yDomainSize / (Real)grid.getDimension(1); + const Real hx_inv = 1 / (hx * hx); + const Real hy_inv = 1 / (hy * hy); + + auto entitiesCount = grid.getEntitiesCount(0); + auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + + auto uxView = ux.getView(), auxView = aux.getView(); TNL::Containers::Array ux(entitiesCount), // data at step u aux(entitiesCount);// data at step u + 1 @@ -163,6 +207,8 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c aux = 0; + + return false; }; int main(int argc, char *argv[]) { diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h index 4e84e0d8d..08658ea14 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h @@ -66,7 +66,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c auto next = [=] __cuda_callable__(int i, int j) mutable { - auto index = j * params.ySize + i; + auto index = j * params.xSize + i; auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv; -- GitLab From 88a8c55137ef9ca42ac23c15cabb0c04d65f321b Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Tue, 9 Nov 2021 18:17:02 +0100 Subject: [PATCH 10/29] Implement fixes to correctly represent dimension. Update code to use TNL Containers --- .../HeatEquationGrid/HeatmapGrid/main.h | 57 ++++++++++--------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 9debca43f..57a96a6d4 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #pragma once @@ -61,15 +62,12 @@ class Grid { */ template ::value...>::value>, - typename = std::enable_if_t 0>> - std::vector getDimensions(DimensionIndex... indicies) const noexcept { - std::vector dimensionIndicies{ indicies... }, result; + typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> + TNL::Containers::StaticArray getDimensions(DimensionIndex... indicies) const noexcept { + TNL::Containers::StaticArray result{indicies...}; - std::transform(dimensionIndicies.begin(), - dimensionIndicies.end(), - std::back_inserter(result), [this](const Index& index) { - return this -> getDimension(index); - }); + for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) + result[i] = this -> getDimension(result[i]); return result; } @@ -87,16 +85,13 @@ class Grid { */ template ::value...>::value>, - typename = std::enable_if_t 0>> - std::vector getEntitiesCounts(DimensionIndex... indicies) const noexcept + typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> + TNL::Containers::StaticArray getEntitiesCounts(DimensionIndex... indicies) const noexcept { - std::vector dimensionIndicies{indicies...}, result; + TNL::Containers::StaticArray result{indicies...}; - std::transform(dimensionIndicies.begin(), - dimensionIndicies.end(), - std::back_inserter(result), [this](const Index &index) { - return getEntitiesCount(index); - }); + for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) + result[i] = this -> getEntitiesCount(result[i]); return result; } @@ -106,7 +101,7 @@ class Grid { template void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept { auto entitiesCount = getEntitiesCount(dimension)[0]; - auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index }; + auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index; }; traverse(0, entitiesCount, identity, function, args...); } @@ -120,7 +115,7 @@ class Grid { IndexTransform transform, Function function, FunctionArgs... args) const noexcept { - TNL_ASSERT_LT(startIndex, endIndex, "Start index must be less than endIndex") + TNL_ASSERT_LT(start, end, "Start index must be less than endIndex") auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable { @@ -129,10 +124,10 @@ class Grid { function(transformedIndex, args...); }; - TNL::Algorithms::ParallelFor::exec(startIndex, endIndex, lambda, args...); + TNL::Algorithms::ParallelFor::exec(start, end, lambda, args...); } private: - std::array dimensions; + TNL::Containers::StaticArray dimensions; /** * @brief - A dimension map is a store for dimension limits over all combinations of basis. * First, (n choose 0) elements will contain the count of 0 dimension elements @@ -147,11 +142,11 @@ class Grid { * * @warning - The ordering of is lexigraphical. */ - std::array dimensionMap; + TNL::Containers::StaticArray<1 << Dimension, Index> dimensionMap; /** * @brief - A cumulative map over dimensions. */ - std::array cumulativeDimensionMap; + TNL::Containers::StaticArray cumulativeDimensionMap; /** * @brief - Fills dimensions map for N-dimensional Grid. * @@ -161,7 +156,8 @@ class Grid { std::array combinationBuffer = {}; std::size_t j = 0; - std::fill(cumulativeDimensionMap.begin(), cumulativeDimensionMap.end(), 0); + for (std::size_t i = 0; i < Dimension + 1; i++) + cumulativeDimensionMap[i] = 0; for (std::size_t i = 0; i <= Dimension; i++) { std::fill(combinationBuffer.begin(), combinationBuffer.end(), false); @@ -170,8 +166,15 @@ class Grid { do { int result = 1; - for (std::size_t i = 0; i < combinationBuffer.size(); i++) - result *= combinationBuffer[i] ? dimensions[i] : dimensions[i] + 1; + for (std::size_t i = 0; i < combinationBuffer.size(); i++) { + // Dimensions are stored in the least significant order. + // For example, the list of dimensions [1, 2, 3] is the size along [x, y, z] dimensions + // Combination generator follow lexical order, so the first element of the sequence is [0, 0, ..., 0, 1]. + // That's why we need to reverse index to obtain valid dimension. + auto reversedIndex = combinationBuffer.size() - 1 - i; + + result *= combinationBuffer[reversedIndex] ? dimensions[i] : dimensions[i] + 1; + } dimensionMap[j] = result; cumulativeDimensionMap[i] += result; @@ -197,11 +200,11 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c auto entitiesCount = grid.getEntitiesCount(0); auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); - auto uxView = ux.getView(), auxView = aux.getView(); - TNL::Containers::Array ux(entitiesCount), // data at step u aux(entitiesCount);// data at step u + 1 + auto uxView = ux.getView(), auxView = aux.getView(); + // Invalidate ux/aux ux = 0; aux = 0; -- GitLab From 40d87ac990f86c6636a2e4f8a795dc54d6b1941f Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 11 Nov 2021 11:50:03 +0100 Subject: [PATCH 11/29] Implement traversal methods --- .../HeatmapGrid/CMakeLists.txt | 8 +- .../HeatEquationGrid/HeatmapGrid/main.h | 140 +++++++++++++----- 2 files changed, 104 insertions(+), 44 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt index a4c44b591..6649d0fb2 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt @@ -1,5 +1,5 @@ -#if (BUILD_CUDA) -# CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu ) -#ELSE() +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu ) +ELSE() add_executable( benchmark_heat_grid main.h main.cpp ) -#ENDIF() +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 57a96a6d4..0f9d32fb0 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "../Base/HeatmapSolver.h" @@ -11,6 +12,7 @@ #include #include #include +#include #include #pragma once @@ -20,6 +22,29 @@ template struct bool_pack {}; template using conjunction = std::is_same, bool_pack>; +template 0)>, + typename = std::enable_if_t::value>> +using Container = TNL::Containers::StaticArray; + +template 0)>, + typename = std::enable_if_t::value>> +class GridEntity { + public: + GridEntity(const Container& origin, + const Container& dimensions, + const std::bitset& direction): origin(origin), + dimensions(dimensions), + direction(direction) {}; + private: + Container origin; + Container dimensions; + std::bitset direction; +}; + template::value...>::value>, @@ -63,7 +90,7 @@ class Grid { template ::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - TNL::Containers::StaticArray getDimensions(DimensionIndex... indicies) const noexcept { + Container getDimensions(DimensionIndex... indicies) const noexcept { TNL::Containers::StaticArray result{indicies...}; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) @@ -86,9 +113,8 @@ class Grid { template ::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - TNL::Containers::StaticArray getEntitiesCounts(DimensionIndex... indicies) const noexcept - { - TNL::Containers::StaticArray result{indicies...}; + Container getEntitiesCounts(DimensionIndex... indicies) const noexcept { + Container result{indicies...}; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) result[i] = this -> getEntitiesCount(result[i]); @@ -96,38 +122,81 @@ class Grid { return result; } /** - * @brief - Traversers a specified dimension in parallel. + * @brief - Traversers all elements in the grid */ template - void traverse(const Index dimension, Function function, FunctionArgs... args) const noexcept { - auto entitiesCount = getEntitiesCount(dimension)[0]; - auto identity = [] __cuda_callable__ (const Index&& index) mutable { return index; }; + void traverseAll(Function function, FunctionArgs... args) const noexcept { + auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable { + function(index, args...); + }; - traverse(0, entitiesCount, identity, function, args...); + TNL::Algorithms::ParallelFor::exec(0, cumulativeDimensionMap[0], lambda, args...); } /** - * TODO: - A possibility of improvement, as it should be possible to specify more precise functions - * to remove user knowledge of functionality + * @brief - Traverses a grid from start index to end index. + * + * @param[in] start - a start index of point + * @param[in] end - an end index of point + * @param[in] directions - A pack of boolean vector flags with the size of the dimension. + * For example, let's have the 3-dimensional grid. + * A pack {false, false, false} will call function for all points + * A pack {true, false, false} will call function for edges directed over dimension at index of true + * A pack {true, true, false} will call function for faces directed over dimension at index of true + * A pack {true, true, true} will call function for cells directed over + * */ - template - void traverse(const Index start, - const Index end, - IndexTransform transform, + template + void traverse(const Container& start, + const Container& end, + const TNL::Containers::Vector>& directions, Function function, FunctionArgs... args) const noexcept { - TNL_ASSERT_LT(start, end, "Start index must be less than endIndex") + // TODO: - This will overflow for higher dimensions + Index startCollapsedIndex = 0; + Index endCollapsedIndex = 0; - auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable - { - auto transformedIndex = transform(index); + // TODO: - For higher dimensions, this will overflow. + Container multipliers = 1; - function(transformedIndex, args...); + for (Index i = 0; i < Dimension; i++) { + for (Index j = i + 1; j < Dimension; j++) + multipliers[i] *= dimensions[j]; + + startCollapsedIndex += start[i] * multipliers[i]; + endCollapsedIndex += end[i] * multipliers[i]; + } + + // TODO: - Improve message formatting + TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start.. cumulativeDimensionMap[0], "End must be less, than amount of points in grid"); + + auto dimensions = this -> dimensions; + + auto outerFunction = [=] __cuda_callable__ (const Index i, FunctionArgs... args) mutable { + Index dim = Dimension - 1; + + while (i != 0) { + Index newIndex = start[dim] + i, dimension = dimensions[dim]; + Index quotient = newIndex / dimension; + Index reminder = newIndex - (dimension * quotient); + + start[dim] = reminder; + i = quotient; + + dim -= 1; + } + + for (const auto& direction: directions) { + GridEntity entity = { start, dimensions, direction }; + + function(entity, args...); + } }; - TNL::Algorithms::ParallelFor::exec(start, end, lambda, args...); + TNL::Algorithms::ParallelFor::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...); } private: - TNL::Containers::StaticArray dimensions; + Container dimensions; /** * @brief - A dimension map is a store for dimension limits over all combinations of basis. * First, (n choose 0) elements will contain the count of 0 dimension elements @@ -136,17 +205,17 @@ class Grid { * * For example, let's have a 3-d grid, then the map indexing will be the next: * 0 - 0 - count of vertices - * 1, 2, 3 - count of edges in x, y, z plane - * 4, 5, 6 - count of faces in xy, xz, yz plane - * 7 - count of cells in xyz plane + * 1, 2, 3 - count of edges in z, y, x plane + * 4, 5, 6 - count of faces in yz, xz, xy plane + * 7 - count of cells in z y x plane * * @warning - The ordering of is lexigraphical. */ - TNL::Containers::StaticArray<1 << Dimension, Index> dimensionMap; + Container<1 << Dimension, Index> dimensionMap; /** * @brief - A cumulative map over dimensions. */ - TNL::Containers::StaticArray cumulativeDimensionMap; + Container cumulativeDimensionMap; /** * @brief - Fills dimensions map for N-dimensional Grid. * @@ -166,15 +235,8 @@ class Grid { do { int result = 1; - for (std::size_t i = 0; i < combinationBuffer.size(); i++) { - // Dimensions are stored in the least significant order. - // For example, the list of dimensions [1, 2, 3] is the size along [x, y, z] dimensions - // Combination generator follow lexical order, so the first element of the sequence is [0, 0, ..., 0, 1]. - // That's why we need to reverse index to obtain valid dimension. - auto reversedIndex = combinationBuffer.size() - 1 - i; - - result *= combinationBuffer[reversedIndex] ? dimensions[i] : dimensions[i] + 1; - } + for (std::size_t k = 0; k < combinationBuffer.size(); k++) + result *= combinationBuffer[k] ? dimensions[k] : dimensions[k] + 1; dimensionMap[j] = result; cumulativeDimensionMap[i] += result; @@ -217,9 +279,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c int main(int argc, char *argv[]) { Grid<3, int> grid; - grid.setDimensions(1, 2, 3); - - + grid.setDimensions(3, 2, 1); return 0; } -- GitLab From 8e005e6eadbb51ba8e8c0e0de4c7ba5af30973e8 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 11 Nov 2021 15:20:24 +0100 Subject: [PATCH 12/29] Work on fixes after testing the implementation of traversion. Revert building of all benchmarks --- src/Benchmarks/CMakeLists.txt | 21 +-- .../HeatEquationGrid/HeatmapGrid/main.h | 124 +++++++++++++----- 2 files changed, 95 insertions(+), 50 deletions(-) diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt index 4ef9404fc..6fa019081 100644 --- a/src/Benchmarks/CMakeLists.txt +++ b/src/Benchmarks/CMakeLists.txt @@ -1,6 +1,5 @@ -#add_subdirectory( HeatEquation ) +add_subdirectory( HeatEquation ) add_subdirectory( HeatEquationGrid ) -<<<<<<< HEAD add_subdirectory( BLAS ) add_subdirectory( NDArray ) add_subdirectory( SpMV ) @@ -9,21 +8,3 @@ add_subdirectory( LinearSolvers ) add_subdirectory( ODESolvers ) add_subdirectory( Sorting ) add_subdirectory( Traversers ) -======= -#add_subdirectory( BLAS ) -#add_subdirectory( NDArray ) -#add_subdirectory( SpMV ) -#add_subdirectory( DistSpMV ) -#add_subdirectory( LinearSolvers ) -#add_subdirectory( ODESolvers ) -#add_subdirectory( Sorting ) -#add_subdirectory( Traversers ) - -set( headers - Benchmarks.h - FunctionTimer.h - Logging.h -) - -install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks ) ->>>>>>> 049219f08 (Implement dimension set/get) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 0f9d32fb0..89b8253f6 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #pragma once @@ -34,15 +33,59 @@ template::value>> class GridEntity { public: - GridEntity(const Container& origin, - const Container& dimensions, - const std::bitset& direction): origin(origin), - dimensions(dimensions), - direction(direction) {}; + __cuda_callable__ inline + GridEntity(const Container& startPosition, + const Index& startOffset, + const Index& offset, + const Container& dimensions): + // const std::bitset direction) : + startPosition(startPosition), + startOffset(startOffset), + position({}), + offset(offset), + dimensions(dimensions) {}//, + //direction(direction) {}; + __cuda_callable__ + ~GridEntity() {}; + + __cuda_callable__ inline + Container getPosition() noexcept { + if (position.getSize() == 0) { + auto position = startPosition; + Index tmpOffset = offset; + + Index dim = Dimension - 1; + + while (offset != 0) { + Index newIndex = position[dim] + tmpOffset, dimension = dimensions[dim]; + Index quotient = newIndex / dimension; + Index reminder = newIndex - (dimension * quotient); + + position[dim] = reminder; + tmpOffset = quotient; + + dim -= 1; + } + + this -> position = position; + } + + return position; + } + + __cuda_callable__ inline + Index getIndex() const noexcept { + return startOffset + offset; + } private: - Container origin; - Container dimensions; - std::bitset direction; + const Container startPosition; + const Index startOffset; + + Container position; + const Index offset; + + const Container dimensions; + // const std::bitset direction; }; template::value...>::value>, @@ -148,7 +189,7 @@ class Grid { template void traverse(const Container& start, const Container& end, - const TNL::Containers::Vector>& directions, + //const TNL::Containers::Array, Device>& directions, Function function, FunctionArgs... args) const noexcept { // TODO: - This will overflow for higher dimensions @@ -167,30 +208,22 @@ class Grid { } // TODO: - Improve message formatting - TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start.. cumulativeDimensionMap[0], "End must be less, than amount of points in grid"); auto dimensions = this -> dimensions; + // auto directionsView = directions.getConstView(); - auto outerFunction = [=] __cuda_callable__ (const Index i, FunctionArgs... args) mutable { - Index dim = Dimension - 1; + auto outerFunction = [=] __cuda_callable__ (Index offset, FunctionArgs... args) mutable { + GridEntity entity{ start, startCollapsedIndex, offset, dimensions }; - while (i != 0) { - Index newIndex = start[dim] + i, dimension = dimensions[dim]; - Index quotient = newIndex / dimension; - Index reminder = newIndex - (dimension * quotient); + function(entity, args...); - start[dim] = reminder; - i = quotient; + // for (Index j = 0; j < directionsView.getSize(); j++) { + // GridEntity entity{ start, startCollapsedIndex, j, dimensions, directionsView[i] }; - dim -= 1; - } - for (const auto& direction: directions) { - GridEntity entity = { start, dimensions, direction }; - - function(entity, args...); - } + //} }; TNL::Algorithms::ParallelFor::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...); @@ -271,15 +304,46 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c ux = 0; aux = 0; + /* auto init = [=] __cuda_callable__(const auto& entity) mutable { + // auto index = j * params.xSize + i; + //auto x = i * hx - params.xDomainSize / 2; + //auto y = j * hy - params.yDomainSize / 2; + + //uxView[index] = exp(params.sigma * (x * x + y * y)); + };*/ + + /* grid.traverse({ 1, 1 }, + { grid.getDimension(0) - 1, grid.getDimension(1) - 1 }, + { { 0b00 } }, + init);*/ return false; }; int main(int argc, char *argv[]) { - Grid<3, int> grid; + Grid<3, int, TNL::Devices::Cuda> grid; + + grid.setDimensions(3, 3, 3); + + auto fn = [=] __cuda_callable__ (int index) { + //printf("%d\n", index); + }; + + grid.traverseAll(fn); + + auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) { + // entity.doSomething(); + //printf("%d\n", entity.getPosition()); + //printf("%d \n", entity.getIndex()); + }; + + //TNL::Containers::Array> directions; + + //directions.resize(1); + //directions[0] = { 0b000 }; - grid.setDimensions(3, 2, 1); + grid.traverse({ 0, 0, 0 }, {3, 3, 3}, fn_entity); return 0; } -- GitLab From 8801e9738c87f98e57935fbfe0c16cfa53bf8bba Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 11 Nov 2021 16:35:24 +0100 Subject: [PATCH 13/29] Fix traversal by coordinates. Change idea of grid dimensions size. Now it is set as the amount of points --- .../HeatEquationGrid/HeatmapGrid/main.h | 54 +++++++++---------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 89b8253f6..a2974a12f 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -33,30 +33,32 @@ template::value>> class GridEntity { public: - __cuda_callable__ inline + __cuda_callable__ inline explicit GridEntity(const Container& startPosition, const Index& startOffset, const Index& offset, - const Container& dimensions): - // const std::bitset direction) : + const Container& dimensions, + const Container direction) : startPosition(startPosition), startOffset(startOffset), - position({}), offset(offset), - dimensions(dimensions) {}//, - //direction(direction) {}; + dimensions(dimensions), + direction(direction) { + this -> position = -1; + }; + __cuda_callable__ ~GridEntity() {}; __cuda_callable__ inline Container getPosition() noexcept { - if (position.getSize() == 0) { + if (position[0] == -1) { auto position = startPosition; Index tmpOffset = offset; Index dim = Dimension - 1; - while (offset != 0) { + while (tmpOffset) { Index newIndex = position[dim] + tmpOffset, dimension = dimensions[dim]; Index quotient = newIndex / dimension; Index reminder = newIndex - (dimension * quotient); @@ -85,7 +87,7 @@ class GridEntity { const Index offset; const Container dimensions; - // const std::bitset direction; + const Container direction; }; template dimensions[i] = x; i++; } @@ -189,7 +192,7 @@ class Grid { template void traverse(const Container& start, const Container& end, - //const TNL::Containers::Array, Device>& directions, + const TNL::Containers::Array, Device>& directions, Function function, FunctionArgs... args) const noexcept { // TODO: - This will overflow for higher dimensions @@ -212,18 +215,14 @@ class Grid { TNL_ASSERT_LE(endCollapsedIndex, this -> cumulativeDimensionMap[0], "End must be less, than amount of points in grid"); auto dimensions = this -> dimensions; - // auto directionsView = directions.getConstView(); + auto directionsView = directions.getConstView(); auto outerFunction = [=] __cuda_callable__ (Index offset, FunctionArgs... args) mutable { - GridEntity entity{ start, startCollapsedIndex, offset, dimensions }; - - function(entity, args...); - - // for (Index j = 0; j < directionsView.getSize(); j++) { - // GridEntity entity{ start, startCollapsedIndex, j, dimensions, directionsView[i] }; + for (Index j = 0; j < directionsView.getSize(); j++) { + GridEntity entity{ start, startCollapsedIndex, offset, dimensions, directionsView[j] }; - - //} + function(entity, args...); + } }; TNL::Algorithms::ParallelFor::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...); @@ -269,7 +268,7 @@ class Grid { int result = 1; for (std::size_t k = 0; k < combinationBuffer.size(); k++) - result *= combinationBuffer[k] ? dimensions[k] : dimensions[k] + 1; + result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k]; dimensionMap[j] = result; cumulativeDimensionMap[i] += result; @@ -322,7 +321,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c }; int main(int argc, char *argv[]) { - Grid<3, int, TNL::Devices::Cuda> grid; + Grid<3, int, TNL::Devices::Host> grid; grid.setDimensions(3, 3, 3); @@ -333,17 +332,12 @@ int main(int argc, char *argv[]) { grid.traverseAll(fn); auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) { - // entity.doSomething(); - //printf("%d\n", entity.getPosition()); - //printf("%d \n", entity.getIndex()); + printf("%d \n", entity.getIndex()); }; - //TNL::Containers::Array> directions; - - //directions.resize(1); - //directions[0] = { 0b000 }; + Container<3, int> direction { 0, 0, 0}; - grid.traverse({ 0, 0, 0 }, {3, 3, 3}, fn_entity); + grid.traverse({ 1, 1, 1 }, { 2, 2, 2 }, { direction }, fn_entity); return 0; } -- GitLab From 394f4f6e56554562966d9c0149555e7640ac4bc6 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Wed, 17 Nov 2021 14:19:37 +0100 Subject: [PATCH 14/29] Finalise n-dimensional grid --- .../HeatEquationGrid/HeatmapGrid/main.h | 224 ++++++++++++------ 1 file changed, 157 insertions(+), 67 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index a2974a12f..f9fe564a5 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -33,33 +33,45 @@ template::value>> class GridEntity { public: - __cuda_callable__ inline explicit - GridEntity(const Container& startPosition, - const Index& startOffset, - const Index& offset, - const Container& dimensions, - const Container direction) : - startPosition(startPosition), - startOffset(startOffset), - offset(offset), - dimensions(dimensions), - direction(direction) { + struct Context { + public: + const int startOffset; + + const Container startPosition; + const Container traverseRectDimensions; + const Container direction; + + Context(const int startOffset, + const Container startPosition, + const Container traverseRectDimensions, + const Container direction): + startOffset(startOffset), + startPosition(startPosition), + traverseRectDimensions(traverseRectDimensions), + direction(direction) {} + }; + + __cuda_callable__ + inline explicit GridEntity(const Context& context, + const Index& vertexIndex) : + vertexIndex(vertexIndex), + context(context) { this -> position = -1; }; __cuda_callable__ ~GridEntity() {}; - __cuda_callable__ inline - Container getPosition() noexcept { + __cuda_callable__ + inline Container getPosition() noexcept { if (position[0] == -1) { - auto position = startPosition; - Index tmpOffset = offset; + Container position = 0; + Index tmpOffset = vertexIndex - context.startOffset; Index dim = Dimension - 1; while (tmpOffset) { - Index newIndex = position[dim] + tmpOffset, dimension = dimensions[dim]; + Index newIndex = position[dim] + tmpOffset, dimension = context.traverseRectDimensions[dim]; Index quotient = newIndex / dimension; Index reminder = newIndex - (dimension * quotient); @@ -69,25 +81,22 @@ class GridEntity { dim -= 1; } - this -> position = position; + for (Index i = 0; i < Dimension; i++) + this -> position[i] = position[i] + context.startPosition[i]; } return position; } - __cuda_callable__ inline - Index getIndex() const noexcept { - return startOffset + offset; + __cuda_callable__ + inline Index getIndex() const noexcept { + return vertexIndex; } private: - const Container startPosition; - const Index startOffset; - Container position; - const Index offset; - const Container dimensions; - const Container direction; + const Index vertexIndex; + const Context& context; }; template::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> Container getDimensions(DimensionIndex... indicies) const noexcept { - TNL::Containers::StaticArray result{indicies...}; + Container result{indicies...}; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) result[i] = this -> getDimension(result[i]); @@ -146,7 +155,7 @@ class Grid { * @param[in] index - index of dimension */ inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept { - TNL_ASSERT_GT(index, 0, "Index must be greater than zero"); + TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension"); return cumulativeDimensionMap(index); @@ -165,6 +174,29 @@ class Grid { return result; } + /** + * @param[in] index - index of dimension + */ + inline __cuda_callable__ Index getEndIndex(Index index) const noexcept { + TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LT(index, Dimension, "Index must be less than or equal to Dimension"); + + return this -> getDimension(index) - 1; + } + /** + * @brief - Returns the last index of specific dimensions + */ + template ::value...>::value>, + typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> + Container getEndIndicies(DimensionIndex... indicies) const noexcept { + Container result{ indicies... }; + + for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) + result[i] = this->getEndIndex(result[i]); + + return result; + } /** * @brief - Traversers all elements in the grid */ @@ -190,42 +222,41 @@ class Grid { * */ template - void traverse(const Container& start, - const Container& end, + void traverse(const Container& firstVertex, + const Container& secondVertex, const TNL::Containers::Array, Device>& directions, Function function, FunctionArgs... args) const noexcept { - // TODO: - This will overflow for higher dimensions - Index startCollapsedIndex = 0; - Index endCollapsedIndex = 0; - - // TODO: - For higher dimensions, this will overflow. - Container multipliers = 1; + Index verticesCount = 1, offset = 0; + Container traverseRectDimensions, offsetPosition, multipliers; for (Index i = 0; i < Dimension; i++) { - for (Index j = i + 1; j < Dimension; j++) - multipliers[i] *= dimensions[j]; + traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1; + verticesCount *= traverseRectDimensions[i]; + offsetPosition[i] = std::min(firstVertex[i], secondVertex[i]); - startCollapsedIndex += start[i] * multipliers[i]; - endCollapsedIndex += end[i] * multipliers[i]; - } + multipliers[i] = i == 0 ? 1 : multipliers[i - 1] * dimensions[i]; + offset += offsetPosition[i] * multipliers[i]; - // TODO: - Improve message formatting - TNL_ASSERT_LT(startCollapsedIndex, endCollapsedIndex, "Traverse range must be in [start.. cumulativeDimensionMap[0], "End must be less, than amount of points in grid"); + TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range"); + TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range"); + } - auto dimensions = this -> dimensions; - auto directionsView = directions.getConstView(); + using Context = typename GridEntity::Context; - auto outerFunction = [=] __cuda_callable__ (Index offset, FunctionArgs... args) mutable { - for (Index j = 0; j < directionsView.getSize(); j++) { - GridEntity entity{ start, startCollapsedIndex, offset, dimensions, directionsView[j] }; + auto outerFunction = [function] __cuda_callable__(Index offset, const Context& context, FunctionArgs... args) mutable { + GridEntity entity{ context, context.startOffset + offset }; - function(entity, args...); - } + function(entity, args...); }; - TNL::Algorithms::ParallelFor::exec(0, endCollapsedIndex - startCollapsedIndex + 1, outerFunction, args...); + Index lowerBound = 0, upperBound = verticesCount; + + for (Index i = 0; i < directions.getSize(); i++) { + const Context context{ offset, offsetPosition, traverseRectDimensions, directions[i] }; + + TNL::Algorithms::ParallelFor::exec(lowerBound, upperBound, outerFunction, context, args...); + } } private: Container dimensions; @@ -303,23 +334,81 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c ux = 0; aux = 0; - /* auto init = [=] __cuda_callable__(const auto& entity) mutable { - // auto index = j * params.xSize + i; + auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { + auto position = entity.getPosition(); + auto index = entity.getIndex(); + + auto x = position[0] * hx - params.xDomainSize / 2; + auto y = position[1] * hx - params.yDomainSize / 2; + + uxView[index] = exp(params.sigma * (x * x + y * y)); + }; + + const Container<2, bool> direction{ false, false }; + + grid.traverse({ 1, 1 }, + { grid.getEndIndex(0), grid.getEndIndex(1) }, + { direction }, + init); + + if (!writeGNUPlot("data.txt", params, ux)) + return false; + + auto xDimension = grid.getDimension(0); + + auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { + auto index = entity.getIndex(); + + auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + + (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv; + }; + + auto update = [=] __cuda_callable__(int i) mutable { + uxView[i] += auxView[i] * timestep; + }; + + Real start = 0; - //auto x = i * hx - params.xDomainSize / 2; - //auto y = j * hy - params.yDomainSize / 2; + while (start < params.finalTime) { + grid.traverse({ 1, 1 }, + { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 }, + { direction }, + next); - //uxView[index] = exp(params.sigma * (x * x + y * y)); - };*/ + grid.traverseAll(update); - /* grid.traverse({ 1, 1 }, - { grid.getDimension(0) - 1, grid.getDimension(1) - 1 }, - { { 0b00 } }, - init);*/ + start += timestep; + } - return false; + return writeGNUPlot("data_final.txt", params, ux); }; +int main(int argc, char* argv[]) { + using Real = double; + + auto config = HeatmapSolver::Parameters::makeInputConfig(); + + TNL::Config::ParameterContainer parameters; + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; + + auto device = parameters.getParameter("device"); + auto params = HeatmapSolver::Parameters(parameters); + + HeatmapSolver solver; + + if (device == "host" && !solver.solve(params)) + return EXIT_FAILURE; + +#ifdef HAVE_CUDA + if (device == "cuda" && !solver.solve(params)) + return EXIT_FAILURE; +#endif + + return EXIT_SUCCESS; +} + +/* int main(int argc, char *argv[]) { Grid<3, int, TNL::Devices::Host> grid; @@ -332,12 +421,13 @@ int main(int argc, char *argv[]) { grid.traverseAll(fn); auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) { - printf("%d \n", entity.getIndex()); + printf("%d %d %d \n", entity.getPosition()[0], entity.getPosition()[1], entity.getPosition()[2]); }; - Container<3, int> direction { 0, 0, 0}; + Container<3, int> direction { 0, 0, 0 }; - grid.traverse({ 1, 1, 1 }, { 2, 2, 2 }, { direction }, fn_entity); + grid.traverse({ 1, 1, 1 }, { 1, 1, 1 }, { direction }, fn_entity); return 0; } +*/ -- GitLab From 6a3159aa498a421318889e44ced4a101794a19a4 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Wed, 17 Nov 2021 14:45:13 +0100 Subject: [PATCH 15/29] Fix boundaries of getEndIndex --- src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index f9fe564a5..6506e8181 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -347,7 +347,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c const Container<2, bool> direction{ false, false }; grid.traverse({ 1, 1 }, - { grid.getEndIndex(0), grid.getEndIndex(1) }, + { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 }, { direction }, init); -- GitLab From 84aeba4a2f13ca795cfca46443c7dd3bdd7c7186 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Wed, 17 Nov 2021 14:47:05 +0100 Subject: [PATCH 16/29] Fix typo: indicies -> indices --- src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h index 6506e8181..40d7bef31 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h @@ -138,13 +138,13 @@ class Grid { return dimensions[index]; } /** - * @param[in] indicies - A dimension index pack + * @param[in] indices - A dimension index pack */ template ::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getDimensions(DimensionIndex... indicies) const noexcept { - Container result{indicies...}; + Container getDimensions(DimensionIndex... indices) const noexcept { + Container result{indices...}; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) result[i] = this -> getDimension(result[i]); @@ -166,8 +166,8 @@ class Grid { template ::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getEntitiesCounts(DimensionIndex... indicies) const noexcept { - Container result{indicies...}; + Container getEntitiesCounts(DimensionIndex... indices) const noexcept { + Container result{indices...}; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) result[i] = this -> getEntitiesCount(result[i]); @@ -189,8 +189,8 @@ class Grid { template ::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getEndIndicies(DimensionIndex... indicies) const noexcept { - Container result{ indicies... }; + Container getEndindices(DimensionIndex... indices) const noexcept { + Container result{ indices... }; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) result[i] = this->getEndIndex(result[i]); -- GitLab From d5f6b2fa42a43afd3a228b04142672cf00161845 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Wed, 17 Nov 2021 17:09:06 +0100 Subject: [PATCH 17/29] Add traversers to Grid1D. --- .../HeatEquationGrid/CMakeLists.txt | 3 +- .../HeatmapGrid/CMakeLists.txt | 5 - .../HeatmapNDimGrid/CMakeLists.txt | 5 + .../{HeatmapGrid => HeatmapNDimGrid}/main.cpp | 0 .../{HeatmapGrid => HeatmapNDimGrid}/main.cu | 0 .../{HeatmapGrid => HeatmapNDimGrid}/main.h | 0 .../HeatmapTNLGrid/CMakeLists.txt | 5 + .../HeatEquationGrid/HeatmapTNLGrid/main.cpp | 2 + .../HeatEquationGrid/HeatmapTNLGrid/main.cu | 2 + .../HeatEquationGrid/HeatmapTNLGrid/main.h | 32 +++++++ src/TNL/Meshes/GridDetails/Grid1D.h | 20 +++- src/TNL/Meshes/GridDetails/Grid1D_impl.h | 94 +++++++++++++++++++ 12 files changed, 161 insertions(+), 7 deletions(-) delete mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt rename src/Benchmarks/HeatEquationGrid/{HeatmapGrid => HeatmapNDimGrid}/main.cpp (100%) rename src/Benchmarks/HeatEquationGrid/{HeatmapGrid => HeatmapNDimGrid}/main.cu (100%) rename src/Benchmarks/HeatEquationGrid/{HeatmapGrid => HeatmapNDimGrid}/main.h (100%) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h diff --git a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt index 9adc7520d..de5592b60 100644 --- a/src/Benchmarks/HeatEquationGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/CMakeLists.txt @@ -1,2 +1,3 @@ ADD_SUBDIRECTORY( HeatmapParallelFor ) -ADD_SUBDIRECTORY( HeatmapGrid ) +ADD_SUBDIRECTORY( HeatmapNDimGrid ) +ADD_SUBDIRECTORY( HeatmapTNLGrid ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt deleted file mode 100644 index 6649d0fb2..000000000 --- a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_grid main.h main.cu ) -ELSE() - add_executable( benchmark_heat_grid main.h main.cpp ) -ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt new file mode 100644 index 000000000..a17375117 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cu ) +ELSE() + add_executable( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cpp rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.cu rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapGrid/main.h rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt new file mode 100644 index 000000000..5672634e3 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( benchmark_heat_tnl_grid main.h main.cu ) +ELSE() + add_executable( benchmark_heat_tnl_grid main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h new file mode 100644 index 000000000..bb54c4982 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h @@ -0,0 +1,32 @@ + +#pragma once + +#include "TNL/Meshes/GridDetails/Grid1D.h" + +using Grid = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>; + +int main(int argc, char *argv[]) { + Grid grid; + + grid.setDimensions({ 100 }); + + auto fn0 = [=] __cuda_callable__(const Grid::EntityType<0>&entity, int variant) mutable { + printf("%d %d\n", variant, entity.getCoordinates().x()); + }; + + auto fn1 = [=] __cuda_callable__(const Grid::EntityType<1>& entity, int variant) mutable { + printf("%d %d\n", variant, entity.getCoordinates().x()); + }; + + grid.forAll<0>(fn0, 0);//, "All 0 dim:"); + grid.forInterior<0>(fn0, 1);//, "Interior 0 dim:"); + grid.forBoundary<0>(fn0, 2);//, "Boundary 0 dim:"); + + + grid.forAll<1>(fn1, 3);//, "All 1 dim:"); + grid.forInterior<1>(fn1, 4);//, "Interior 1 dim:"); + grid.forBoundary<1>(fn1, 5);//, "Boundary 1 dim:"); + + return 0; +} + diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h index 64f783a17..e7e31326b 100644 --- a/src/TNL/Meshes/GridDetails/Grid1D.h +++ b/src/TNL/Meshes/GridDetails/Grid1D.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace TNL { namespace Meshes { @@ -176,11 +177,28 @@ public: const RealType& getSpaceStepsProducts() const; /** - * \breif Returns the measure (length) of a cell in this grid. + * \brief Returns the measure (length) of a cell in this grid. */ __cuda_callable__ inline const RealType& getCellMeasure() const; + /** + * \brief Traverses all elements + */ + template + void forAll(Func func, FuncArgs... args) const; + + /** + * \brief Traversers boundary elements + */ + template + void forBoundary(Func func, FuncArgs... args) const; + + /** + * \brief Traversers interior elements + */ + template + void forInterior(Func func, FuncArgs... args) const; /** * \brief Returns the smallest length of step out of all coordinates (axes). */ diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h index 0ad33b103..8d73408fa 100644 --- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h @@ -336,6 +336,100 @@ getCellMeasure() const return this->template getSpaceStepsProducts< 1 >(); } +template< typename Real, + typename Device, + typename Index > +template +void Grid<1, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { + static_assert(EntityDimension >= 0 && EntityDimension <= 1, "Entity dimension must be either 0 or 1"); + + auto outer = [=] __cuda_callable__(Index i, const Grid<1, Real, Device, Index>&grid, FuncArgs... args) mutable { + EntityType entity(grid); + + entity.setCoordinates(i); + entity.refresh(); + + func(entity, args...); + }; + + switch (EntityDimension) { + case 0: + TNL::Algorithms::ParallelFor::exec(0, dimensions.x() + 1, outer, *this, args...); + break; + case 1: + TNL::Algorithms::ParallelFor::exec(localBegin.x(), localEnd.x(), outer, *this, args...); + break; + default: break; + } +} + +template< typename Real, + typename Device, + typename Index > +template +void Grid<1, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) const { + static_assert(EntityDimension >= 0 && EntityDimension <= 1, "Entity dimension must be either 0 or 1"); + + auto outer = [=] __cuda_callable__(Index i, const Grid<1, Real, Device, Index>&grid, FuncArgs... args) mutable { + EntityType entity(grid); + + entity.setCoordinates(i); + entity.refresh(); + + func(entity, args...); + }; + + switch (EntityDimension) { + case 0: + outer(0, *this, args...); + outer(getDimensions().x(), *this, args...); + break; + case 1: + if (localBegin < interiorBegin && interiorEnd < localEnd) { + outer(interiorBegin.x() - 1, *this, args...); + outer(interiorEnd.x(), *this, args...); + break; + } + + if (localBegin < interiorBegin) { + outer(interiorBegin.x() - 1, *this, args...); + break; + } + + if (interiorEnd < localEnd) + outer(interiorEnd.x(), *this, args...); + break; + default: break; + } +} + +template< typename Real, + typename Device, + typename Index > +template +void Grid<1, Real, Device, Index>::forInterior(Func func, FuncArgs... args) const { + static_assert(EntityDimension >= 0 && EntityDimension <= 1, "Entity dimension must be either 0 or 1"); + + auto outer = [=] __cuda_callable__(Index i, const Grid<1, Real, Device, Index>&grid, FuncArgs... args) mutable { + EntityType entity(grid); + + entity.setCoordinates(i); + entity.refresh(); + + func(entity, args...); + }; + + switch (EntityDimension) { + case 0: + TNL::Algorithms::ParallelFor::exec(1, dimensions.x(), outer, *this, args...); + break; + case 1: + TNL::Algorithms::ParallelFor::exec(interiorBegin.x(), interiorEnd.x(), outer, *this, args...); + break; + default: break; + } +} + template< typename Real, typename Device, typename Index > -- GitLab From 0dbc6599a28b40b719cf2807da18c3a888f868a5 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 18 Nov 2021 14:35:11 +0100 Subject: [PATCH 18/29] Implement 2D grid traversal methods --- .../HeatEquationGrid/HeatmapTNLGrid/main.h | 57 ++++- src/TNL/Meshes/GridDetails/Grid2D.h | 18 ++ src/TNL/Meshes/GridDetails/Grid2D_impl.h | 217 ++++++++++++++++++ 3 files changed, 280 insertions(+), 12 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h index bb54c4982..45154f05c 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h @@ -3,30 +3,63 @@ #include "TNL/Meshes/GridDetails/Grid1D.h" -using Grid = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>; +using Grid1D = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>; +using Grid2D = TNL::Meshes::Grid<2, float, TNL::Devices::Cuda, int>; -int main(int argc, char *argv[]) { - Grid grid; +void check1D() { + Grid1D grid; grid.setDimensions({ 100 }); - auto fn0 = [=] __cuda_callable__(const Grid::EntityType<0>&entity, int variant) mutable { + auto fn0 = [=] __cuda_callable__(const Grid1D::EntityType<0>&entity, int variant) mutable { printf("%d %d\n", variant, entity.getCoordinates().x()); - }; + }; - auto fn1 = [=] __cuda_callable__(const Grid::EntityType<1>& entity, int variant) mutable { + auto fn1 = [=] __cuda_callable__(const Grid1D::EntityType<1>&entity, int variant) mutable { printf("%d %d\n", variant, entity.getCoordinates().x()); }; - grid.forAll<0>(fn0, 0);//, "All 0 dim:"); - grid.forInterior<0>(fn0, 1);//, "Interior 0 dim:"); - grid.forBoundary<0>(fn0, 2);//, "Boundary 0 dim:"); + grid.forAll<0>(fn0, 0); + grid.forInterior<0>(fn0, 1); + grid.forBoundary<0>(fn0, 2); + + grid.forAll<1>(fn1, 3); + grid.forInterior<1>(fn1, 4); + grid.forBoundary<1>(fn1, 5); +} + +void check2D() { + Grid2D grid; + + grid.setDimensions({ 10, 10 }); + auto fn0 = [=] __cuda_callable__(const Grid2D::EntityType<0>&entity, int variant) mutable { + printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); + }; - grid.forAll<1>(fn1, 3);//, "All 1 dim:"); - grid.forInterior<1>(fn1, 4);//, "Interior 1 dim:"); - grid.forBoundary<1>(fn1, 5);//, "Boundary 1 dim:"); + auto fn1 = [=] __cuda_callable__(const Grid2D::EntityType<1>&entity, int variant) mutable { + printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); + }; + auto fn2 = [=] __cuda_callable__(const Grid2D::EntityType<2>&entity, int variant) mutable { + printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); + }; + + grid.forAll<0>(fn0, 0); + grid.forInterior<0>(fn0, 1); + grid.forBoundary<0>(fn0, 2); + + grid.forAll<1>(fn1, 3); + grid.forInterior<1>(fn1, 4); + grid.forBoundary<1>(fn1, 5); + + grid.forAll<2>(fn2, 6); + grid.forInterior<2>(fn2, 7); + grid.forBoundary<2>(fn2, 8); +} + +int main(int argc, char *argv[]) { + check2D(); return 0; } diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h index d11487b5d..3f9f6e9dd 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D.h +++ b/src/TNL/Meshes/GridDetails/Grid2D.h @@ -186,6 +186,24 @@ class Grid< 2, Real, Device, Index > __cuda_callable__ inline RealType getSmallestSpaceStep() const; + /** + * \brief Traverses all elements + */ + template + void forAll(Func func, FuncArgs... args) const; + + /** + * \brief Traversers interior elements + */ + template + void forInterior(Func func, FuncArgs... args) const; + + /** + * \brief Traversers boundary elements + */ + template + void forBoundary(Func func, FuncArgs... args) const; + void writeProlog( Logger& logger ) const; protected: diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h index fc290d23d..4b5d80af7 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h @@ -414,6 +414,223 @@ Real Grid< 2, Real, Device, Index > :: getSmallestSpaceStep() const return min( this->spaceSteps.x(), this->spaceSteps.y() ); } +template< typename Real, + typename Device, + typename Index > +template +void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { + static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]"); + + auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable { + EntityType entity(grid); + + entity.setCoordinates({ i, j }); + entity.refresh(); + + func(entity, args...); + }; + + switch (EntityDimension) { + case 0: + TNL::Algorithms::ParallelFor2D::exec(0, 0, dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...); + break; + case 1: { + auto outerOriented = [=] __cuda_callable__(Index i, Index j, + const Grid<2, Real, Device, Index>&grid, + const CoordinatesType & orientation, + const CoordinatesType & basis, + FuncArgs... args) mutable { + EntityType entity(grid, CoordinatesType(i, j), orientation, basis); + + entity.refresh(); + + func(entity, args...); + }; + + TNL::Algorithms::ParallelFor2D::exec(0, 0, + dimensions.x() + 1, dimensions.y(), + outerOriented, + *this, + CoordinatesType(1, 0), + CoordinatesType(0, 1), + args...); + + TNL::Algorithms::ParallelFor2D::exec(0, 0, + dimensions.x(), dimensions.y() + 1, + outerOriented, + *this, + CoordinatesType(0, 1), + CoordinatesType(1, 0), + args...); + break; + } + case 2: + TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), localBegin.y(), localEnd.x(), localEnd.y(), outer, * this, args...); + break; + default: break; + } +} + +template< typename Real, + typename Device, + typename Index > +template +void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) const { + static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]"); + + auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable { + EntityType entity(grid); + + entity.setCoordinates({ i, j }); + entity.refresh(); + + func(entity, args...); + }; + + switch (EntityDimension) { + case 0: + TNL::Algorithms::ParallelFor2D::exec(1, 1, dimensions.x(), dimensions.y(), outer, *this, args...); + break; + case 1: { + auto outerOriented = [=] __cuda_callable__(Index i, Index j, + const Grid<2, Real, Device, Index>&grid, + const CoordinatesType& orientation, + const CoordinatesType& basis, + FuncArgs... args) mutable { + EntityType entity(grid, CoordinatesType(i, j), orientation, basis); + + entity.refresh(); + + func(entity, args...); + }; + + TNL::Algorithms::ParallelFor2D::exec(1, 0, + dimensions.x(), dimensions.y(), + outerOriented, + *this, + CoordinatesType(1, 0), + CoordinatesType(0, 1), + args...); + + TNL::Algorithms::ParallelFor2D::exec(0, 1, + dimensions.x(), dimensions.y(), + outerOriented, + *this, + CoordinatesType(0, 1), + CoordinatesType(1, 0), + args...); + break; + } + case 2: + TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x(), interiorBegin.y(), interiorEnd.x(), interiorEnd.y(), outer, *this, args...); + break; + default: + break; + } +} + +template< typename Real, + typename Device, + typename Index > +template +void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) const { + static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]"); + + auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable { + EntityType entity(grid); + + entity.setCoordinates({ i, j }); + entity.refresh(); + + func(entity, args...); + }; + + switch (EntityDimension) { + case 0: + // Lower horizontal + TNL::Algorithms::ParallelFor2D::exec(0, 0, dimensions.x() + 1, 1, outer, *this, args...); + // Upper horizontal + TNL::Algorithms::ParallelFor2D::exec(0, dimensions.y(), dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...); + // Left vertical + TNL::Algorithms::ParallelFor2D::exec(0, 0, 1, dimensions.y() + 1, outer, *this, args...); + // Right vertical + TNL::Algorithms::ParallelFor2D::exec(dimensions.x(), 0, dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...); + break; + case 1: { + auto outerOriented = [=] __cuda_callable__(Index i, Index j, + const Grid<2, Real, Device, Index>&grid, + const CoordinatesType & orientation, + const CoordinatesType & basis, + FuncArgs... args) mutable { + EntityType entity(grid, CoordinatesType(i, j), orientation, basis); + + entity.refresh(); + + func(entity, args...); + }; + + // Lower horizontal + TNL::Algorithms::ParallelFor2D::exec(0, 0, + dimensions.x() + 1, 1, + outerOriented, + *this, + CoordinatesType(1, 0), + CoordinatesType(0, 1), + args...); + // Upper horizontal + TNL::Algorithms::ParallelFor2D::exec(0, dimensions.y() - 1, + dimensions.x() + 1, dimensions.y(), + outerOriented, + *this, + CoordinatesType(1, 0), + CoordinatesType(0, 1), + args...); + // Left vertical + TNL::Algorithms::ParallelFor2D::exec(0, 0, + 1, dimensions.y(), + outerOriented, + *this, + CoordinatesType(0, 1), + CoordinatesType(1, 0), + args...); + // Right vertical + TNL::Algorithms::ParallelFor2D::exec(dimensions.x() - 1, 0, + dimensions.x(), dimensions.y(), + outerOriented, + *this, + CoordinatesType(0, 1), + CoordinatesType(1, 0), + args...); + break; + } + case 2: + if (localBegin < interiorBegin && interiorEnd < localEnd) { + // Lower horizontal + TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x() - 1, interiorBegin.y() - 1, interiorEnd.x() + 1, interiorBegin.y() + 1, outer, *this, args...); + // Upper horizontal + TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x() - 1, interiorEnd.y() - 1, interiorEnd.x() + 1, interiorEnd.y() + 1, outer, *this, args...); + // Left vertical + TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x() -1, interiorBegin.y() - 1, interiorBegin.x() + 1, interiorEnd.y() + 1, outer, *this, args...); + // Right vertical + TNL::Algorithms::ParallelFor2D::exec(interiorEnd.x() - 1, interiorBegin.y() - 1, interiorEnd.x() + 1, interiorEnd.y() + 1, outer, *this, args...); + return; + } + + // Lower horizontal + TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), localBegin.y(), localEnd.x(), interiorBegin.y(), outer, *this, args...); + // Upper horizontal + TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), interiorEnd.y(), localEnd.x(), localEnd.y(), outer, *this, args...); + // Left vertical + TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), interiorBegin.y(), interiorBegin.x(), interiorEnd.y(), outer, *this, args...); + // Right vertical + TNL::Algorithms::ParallelFor2D::exec(interiorEnd.x(), interiorBegin.y(), localEnd.x(), interiorEnd.y(), outer, *this, args...); + + break; + default: + break; + } +} + template< typename Real, typename Device, typename Index > -- GitLab From 8b76f89003553bcd77c35dcd989b515da9aee175 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 25 Nov 2021 14:16:11 +0100 Subject: [PATCH 19/29] Fix indexing of the NDim grid --- .../HeatEquationGrid/HeatmapNDimGrid/main.h | 182 ++++++++++-------- 1 file changed, 107 insertions(+), 75 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h index 40d7bef31..fbd536f8f 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h @@ -33,59 +33,20 @@ template::value>> class GridEntity { public: - struct Context { - public: - const int startOffset; - - const Container startPosition; - const Container traverseRectDimensions; - const Container direction; - - Context(const int startOffset, - const Container startPosition, - const Container traverseRectDimensions, - const Container direction): - startOffset(startOffset), - startPosition(startPosition), - traverseRectDimensions(traverseRectDimensions), - direction(direction) {} - }; - __cuda_callable__ - inline explicit GridEntity(const Context& context, - const Index& vertexIndex) : + inline explicit GridEntity(const Index& vertexIndex, + const Container& coordinates, + const Container& direction) : vertexIndex(vertexIndex), - context(context) { - this -> position = -1; - }; + coordinates(coordinates), + direction(direction) {}; __cuda_callable__ ~GridEntity() {}; __cuda_callable__ - inline Container getPosition() noexcept { - if (position[0] == -1) { - Container position = 0; - Index tmpOffset = vertexIndex - context.startOffset; - - Index dim = Dimension - 1; - - while (tmpOffset) { - Index newIndex = position[dim] + tmpOffset, dimension = context.traverseRectDimensions[dim]; - Index quotient = newIndex / dimension; - Index reminder = newIndex - (dimension * quotient); - - position[dim] = reminder; - tmpOffset = quotient; - - dim -= 1; - } - - for (Index i = 0; i < Dimension; i++) - this -> position[i] = position[i] + context.startPosition[i]; - } - - return position; + inline Container getCoordinates() const noexcept { + return coordinates; } __cuda_callable__ @@ -93,10 +54,9 @@ class GridEntity { return vertexIndex; } private: - Container position; - const Index vertexIndex; - const Context& context; + const Container coordinates; + const Container direction; }; template::value...>::value>, typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getEndindices(DimensionIndex... indices) const noexcept { + Container getEndIndices(DimensionIndex... indices) const noexcept { Container result{ indices... }; for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) @@ -227,25 +187,31 @@ class Grid { const TNL::Containers::Array, Device>& directions, Function function, FunctionArgs... args) const noexcept { - Index verticesCount = 1, offset = 0; - Container traverseRectDimensions, offsetPosition, multipliers; + Index verticesCount = 1; + Container traverseRectOrigin, traverseRectDimensions, + dimensionsProducts, traverseRectDimensionsProducts; for (Index i = 0; i < Dimension; i++) { traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1; verticesCount *= traverseRectDimensions[i]; - offsetPosition[i] = std::min(firstVertex[i], secondVertex[i]); - - multipliers[i] = i == 0 ? 1 : multipliers[i - 1] * dimensions[i]; - offset += offsetPosition[i] * multipliers[i]; + traverseRectOrigin[i] = std::min(firstVertex[i], secondVertex[i]); TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range"); TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range"); } - using Context = typename GridEntity::Context; + dimensionsProducts = getDimensionProducts(dimensions); + traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions); - auto outerFunction = [function] __cuda_callable__(Index offset, const Context& context, FunctionArgs... args) mutable { - GridEntity entity{ context, context.startOffset + offset }; + auto outerFunction = [=] __cuda_callable__(Index offset, + const Container& traverseRectOrigin, + const Container& traverseRectDimensions, + const Container& traverseRectDimensionsProducts, + const Container& dimensionsProducts, + FunctionArgs... args) mutable { + auto entity = this -> makeEntitity(offset, + traverseRectOrigin, traverseRectDimensions, + traverseRectDimensionsProducts, dimensionsProducts); function(entity, args...); }; @@ -253,9 +219,12 @@ class Grid { Index lowerBound = 0, upperBound = verticesCount; for (Index i = 0; i < directions.getSize(); i++) { - const Context context{ offset, offsetPosition, traverseRectDimensions, directions[i] }; - - TNL::Algorithms::ParallelFor::exec(lowerBound, upperBound, outerFunction, context, args...); + TNL::Algorithms::ParallelFor::exec(lowerBound, upperBound, outerFunction, + traverseRectOrigin, + traverseRectDimensions, + traverseRectDimensionsProducts, + dimensionsProducts, + args...); } } private: @@ -308,6 +277,73 @@ class Grid { } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end())); } } + + __cuda_callable__ inline + GridEntity makeEntitity(const Index index, + const Container& traverseRectOrigin, + const Container& traverseRectDimensions, + const Container& traverseRectDimensionsProducts, + const Container& dimensionsProducts) const { + const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions); + Container globalCoordinates = 0; + + for (Index i = 0; i < Dimension; i++) + globalCoordinates[i] = traverseRectOrigin[i] + traverseCoordinates[i]; + + const auto globalIndex = getIndex(globalCoordinates, dimensionsProducts); + + return GridEntity(globalIndex, globalCoordinates, {}); + } + /** + * Calculates position in the specific boundaries + */ + __cuda_callable__ inline + Container getCoordinates(const Index index, const Container &dimensions) const { + Container coordinates = 0; + Index tmpIndex = index; + + Index dim = Dimension - 1; + + // TODO: - Implement overflow check. + while (tmpIndex) { + Index dimension = dimensions[dim], + quotient = tmpIndex / dimension, + reminder = tmpIndex - (dimension * quotient); + + coordinates[dim] = reminder; + tmpIndex = quotient; + + dim -= 1; + } + + return coordinates; + } + /** + * Calculates product matrix based on the dimension + */ + __cuda_callable__ inline + Container getDimensionProducts(const Container& dimensions) const noexcept { + Container products = 0; + + for (Index i = Dimension; i > 0; i--) + products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i]; + + return products; + } + /** + * Calculates index based on the dimension + */ + __cuda_callable__ inline + Index getIndex(const Container& coordinates, + const Container& dimensionProducts) const { + Index index = 0; + + for (Index i = 0; i < Dimension; i++) + index += coordinates[i] * dimensionProducts[i]; + + return index; + } + }; template @@ -335,7 +371,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c aux = 0; auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { - auto position = entity.getPosition(); + auto position = entity.getCoordinates(); auto index = entity.getIndex(); auto x = position[0] * hx - params.xDomainSize / 2; @@ -383,6 +419,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c return writeGNUPlot("data_final.txt", params, ux); }; + int main(int argc, char* argv[]) { using Real = double; @@ -408,25 +445,20 @@ int main(int argc, char* argv[]) { return EXIT_SUCCESS; } + /* int main(int argc, char *argv[]) { - Grid<3, int, TNL::Devices::Host> grid; - - grid.setDimensions(3, 3, 3); - - auto fn = [=] __cuda_callable__ (int index) { - //printf("%d\n", index); - }; + Grid<2, int, TNL::Devices::Cuda> grid; - grid.traverseAll(fn); + grid.setDimensions(5, 5); - auto fn_entity = [=] __cuda_callable__ (GridEntity<3, int> entity) { - printf("%d %d %d \n", entity.getPosition()[0], entity.getPosition()[1], entity.getPosition()[2]); + auto fn_entity = [=] __cuda_callable__ (GridEntity<2, int> entity) { + printf("%d %d %d\n", entity.getCoordinates()[0], entity.getCoordinates()[1], entity.getIndex()); }; - Container<3, int> direction { 0, 0, 0 }; + Container<2, int> direction { 0, 0 }; - grid.traverse({ 1, 1, 1 }, { 1, 1, 1 }, { direction }, fn_entity); + grid.traverse({ 1, 1 }, { 4, 4 }, { direction }, fn_entity); return 0; } -- GitLab From c77d6ff52d75467e1b3df76245b8bf66fa66bdc5 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 25 Nov 2021 14:21:57 +0100 Subject: [PATCH 20/29] Remove distributed grid indexing from 1D grid --- src/TNL/Meshes/GridDetails/Grid1D_impl.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h index 8d73408fa..03886faf7 100644 --- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h @@ -357,7 +357,10 @@ void Grid<1, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { TNL::Algorithms::ParallelFor::exec(0, dimensions.x() + 1, outer, *this, args...); break; case 1: - TNL::Algorithms::ParallelFor::exec(localBegin.x(), localEnd.x(), outer, *this, args...); + //TODO: - Update for distributed grid + //TNL::Algorithms::ParallelFor::exec(localBegin.x(), localEnd.x(), outer, *this, args...); + + TNL::Algorithms::ParallelFor::exec(0, dimensions.x(), outer, *this, args...); break; default: break; } @@ -381,11 +384,16 @@ void Grid<1, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons switch (EntityDimension) { case 0: - outer(0, *this, args...); - outer(getDimensions().x(), *this, args...); + // TODO: - Implement call within a single kernel + TNL::Algorithms::ParallelFor::exec(0, 1, outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(getDimensions().x(), getDimensions().x() + 1, outer, *this, args...); break; case 1: - if (localBegin < interiorBegin && interiorEnd < localEnd) { + TNL::Algorithms::ParallelFor::exec(0, 1, outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(getDimensions().x() - 1, getDimensions().x(), outer, *this, args...); + + // TODO: - Verify for distributed grid + /*if (localBegin < interiorBegin && interiorEnd < localEnd) { outer(interiorBegin.x() - 1, *this, args...); outer(interiorEnd.x(), *this, args...); break; @@ -397,7 +405,7 @@ void Grid<1, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons } if (interiorEnd < localEnd) - outer(interiorEnd.x(), *this, args...); + outer(interiorEnd.x(), *this, args...);*/ break; default: break; } @@ -424,7 +432,10 @@ void Grid<1, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons TNL::Algorithms::ParallelFor::exec(1, dimensions.x(), outer, *this, args...); break; case 1: - TNL::Algorithms::ParallelFor::exec(interiorBegin.x(), interiorEnd.x(), outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(1, dimensions.x() - 1, outer, *this, args...); + + // TODO: - Verify for distributed grids + //TNL::Algorithms::ParallelFor::exec(interiorBegin.x(), interiorEnd.x(), outer, *this, args...); break; default: break; } -- GitLab From 0829a319f81da5d190d9512a2ebcaf3a07cf104e Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 25 Nov 2021 14:22:59 +0100 Subject: [PATCH 21/29] Remove neighbour grid entities/get neighbours storage --- src/TNL/Meshes/GridDetails/GridEntity_impl.h | 85 ++------------------ src/TNL/Meshes/GridEntity.h | 27 ------- 2 files changed, 5 insertions(+), 107 deletions(-) diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h index fb5548f35..d91b5f449 100644 --- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h +++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h @@ -19,17 +19,6 @@ namespace TNL { namespace Meshes { -/*template< int Dimension, - typename Real, - typename Device, - typename Index, typename Config, - int EntityDimension > -__cuda_callable__ inline -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension >:: -GridEntity() -{ -}*/ - template< int Dimension, typename Real, typename Device, @@ -43,8 +32,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid ) entityIndex( -1 ), coordinates( 0 ), orientation( 0 ), - basis( 0 ), - neighborEntitiesStorage( *this ) + basis( 0 ) { } @@ -64,8 +52,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid, entityIndex( -1 ), coordinates( coordinates ), orientation( orientation ), - basis( basis ), - neighborEntitiesStorage( *this ) + basis( basis ) { } @@ -123,7 +110,6 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Con refresh() { this->entityIndex = this->grid.getEntityIndex( *this ); - this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex ); } template< int Dimension, @@ -205,21 +191,6 @@ setBasis( const EntityBasisType& basis ) this->orientation = EntityOrientationType( 1 ) - abs( basis ); } -template< int Dimension, - typename Real, - typename Device, - typename Index, - int EntityDimension, - typename Config > - template< int NeighborEntityDimension > -__cuda_callable__ inline -const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::template NeighborEntities< NeighborEntityDimension >& -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >:: -getNeighborEntities() const -{ - return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >(); -} - template< int Dimension, typename Real, typename Device, @@ -276,19 +247,6 @@ getMesh() const return this->grid; } -/**** - * Specialization for cells - */ -/*template< int Dimension, - typename Real, - typename Device, - typename Index > -__cuda_callable__ inline -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension >:: -GridEntity() -{ -}*/ - template< int Dimension, typename Real, typename Device, @@ -298,8 +256,7 @@ __cuda_callable__ inline GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:: GridEntity( const GridType& grid ) : grid( grid ), - entityIndex( -1 ), - neighborEntitiesStorage( *this ) + entityIndex( -1 ) { this->coordinates = CoordinatesType( ( Index ) 0 ); } @@ -317,8 +274,7 @@ GridEntity( const GridType& grid, const EntityBasisType& basis ) : grid( grid ), entityIndex( -1 ), - coordinates( coordinates ), - neighborEntitiesStorage( *this ) + coordinates( coordinates ) { } @@ -372,7 +328,6 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >: refresh() { this->entityIndex = this->grid.getEntityIndex( *this ); - this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex ); } template< int Dimension, @@ -419,20 +374,6 @@ getBasis() const return EntityBasisType( ( IndexType ) 1 ); } -template< int Dimension, - typename Real, - typename Device, - typename Index, - typename Config > - template< int NeighborEntityDimension > -__cuda_callable__ inline -const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::template NeighborEntities< NeighborEntityDimension >& -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:: -getNeighborEntities() const -{ - return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >(); -} - template< int Dimension, typename Real, typename Device, @@ -531,8 +472,7 @@ GridEntity( const GridType& grid, const EntityBasisType& basis ) : grid( grid ), entityIndex( -1 ), - coordinates( coordinates ), - neighborEntitiesStorage( *this ) + coordinates( coordinates ) { } @@ -586,7 +526,6 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >:: refresh() { this->entityIndex = this->grid.getEntityIndex( *this ); - this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex ); } template< int Dimension, @@ -635,20 +574,6 @@ getBasis() const return EntityBasisType( ( IndexType ) 0 ); } -template< int Dimension, - typename Real, - typename Device, - typename Index, - typename Config > - template< int NeighborEntityDimension > -__cuda_callable__ inline -const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::template NeighborEntities< NeighborEntityDimension >& -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >:: -getNeighborEntities() const -{ - return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >(); -} - template< int Dimension, typename Real, typename Device, diff --git a/src/TNL/Meshes/GridEntity.h b/src/TNL/Meshes/GridEntity.h index 573089dc3..19e13c531 100644 --- a/src/TNL/Meshes/GridEntity.h +++ b/src/TNL/Meshes/GridEntity.h @@ -60,8 +60,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType; typedef typename GridType::PointType PointType; - typedef NeighborGridEntitiesStorage< GridEntity, Config > NeighborGridEntitiesStorageType; - template< int NeighborEntityDimension = getEntityDimension() > using NeighborEntities = NeighborGridEntityGetter< @@ -112,11 +110,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio __cuda_callable__ inline void setBasis( const EntityBasisType& basis ); - template< int NeighborEntityDimension = getEntityDimension() > - __cuda_callable__ inline - const NeighborEntities< NeighborEntityDimension >& - getNeighborEntities() const; - __cuda_callable__ inline bool isBoundaryEntity() const; @@ -141,8 +134,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio EntityBasisType basis; - NeighborGridEntitiesStorageType neighborEntitiesStorage; - //__cuda_callable__ inline //GridEntity(); @@ -175,10 +166,8 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con constexpr static int getEntityDimension() { return getMeshDimension(); }; - typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType; typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType; - typedef NeighborGridEntitiesStorage< GridEntity, Config > NeighborGridEntitiesStorageType; template< int NeighborEntityDimension = getEntityDimension() > using NeighborEntities = @@ -231,11 +220,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con __cuda_callable__ inline void setBasis( const EntityBasisType& basis ){}; - template< int NeighborEntityDimension = Dimension > - __cuda_callable__ inline - const NeighborEntities< NeighborEntityDimension >& - getNeighborEntities() const; - __cuda_callable__ inline bool isBoundaryEntity() const; @@ -259,8 +243,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con CoordinatesType coordinates; - NeighborGridEntitiesStorageType neighborEntitiesStorage; - //__cuda_callable__ inline //GridEntity(); @@ -295,7 +277,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config > typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType; typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType; - typedef NeighborGridEntitiesStorage< GridEntity, Config > NeighborGridEntitiesStorageType; template< int NeighborEntityDimension = getEntityDimension() > using NeighborEntities = @@ -348,12 +329,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config > __cuda_callable__ inline void setBasis( const EntityBasisType& basis ){}; - - template< int NeighborEntityDimension = getEntityDimension() > - __cuda_callable__ inline - const NeighborEntities< NeighborEntityDimension >& - getNeighborEntities() const; - __cuda_callable__ inline bool isBoundaryEntity() const; @@ -381,8 +356,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config > CoordinatesType coordinates; - NeighborGridEntitiesStorageType neighborEntitiesStorage; - friend class BoundaryGridEntityChecker< GridEntity >; friend class GridEntityCenterGetter< GridEntity >; -- GitLab From 7d87876180c14769353058afbabb789b6d5b5c2a Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 25 Nov 2021 23:18:41 +0100 Subject: [PATCH 22/29] Update Grid2D to efficiently process boundary entities --- src/TNL/Meshes/GridDetails/Grid2D.h | 4 +- src/TNL/Meshes/GridDetails/Grid2D_impl.h | 129 ++++++++++++------- src/TNL/Meshes/GridDetails/GridEntity_impl.h | 4 +- 3 files changed, 90 insertions(+), 47 deletions(-) diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h index 3f9f6e9dd..18a21c0ea 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D.h +++ b/src/TNL/Meshes/GridDetails/Grid2D.h @@ -124,7 +124,7 @@ class Grid< 2, Real, Device, Index > */ template< int EntityDimension > __cuda_callable__ - IndexType getEntitiesCount() const; + Index getEntitiesCount() const; /** * \brief Gets number of entities in this grid. @@ -132,7 +132,7 @@ class Grid< 2, Real, Device, Index > */ template< typename Entity > __cuda_callable__ - inline IndexType getEntitiesCount() const; + inline Index getEntitiesCount() const; /** * \brief See Grid1D::getEntity(). diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h index 4b5d80af7..65da95107 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h @@ -465,7 +465,10 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { break; } case 2: - TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), localBegin.y(), localEnd.x(), localEnd.y(), outer, * this, args...); + TNL::Algorithms::ParallelFor2D::exec(0, 0, dimensions.x(), dimensions.y(), outer, *this, args...); + + // TODO: - Verify for distributed grids + //TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), localBegin.y(), localEnd.x(), localEnd.y(), outer, * this, args...); break; default: break; } @@ -493,7 +496,7 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons break; case 1: { auto outerOriented = [=] __cuda_callable__(Index i, Index j, - const Grid<2, Real, Device, Index>&grid, + Grid<2, Real, Device, Index>& grid, const CoordinatesType& orientation, const CoordinatesType& basis, FuncArgs... args) mutable { @@ -522,7 +525,10 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons break; } case 2: - TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x(), interiorBegin.y(), interiorEnd.x(), interiorEnd.y(), outer, *this, args...); + TNL::Algorithms::ParallelFor2D::exec(1, 1, dimensions.x() - 1, dimensions.y() - 1, outer, *this, args...); + + // TODO: - Verify for distributed grids + //TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x(), interiorBegin.y(), interiorEnd.x(), interiorEnd.y(), outer, *this, args...); break; default: break; @@ -536,10 +542,19 @@ template void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) const { static_assert(EntityDimension >= 0 && EntityDimension <= 2, "Entity dimension must be in range [0..<2]"); - auto outer = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable { + auto outer = [=] __cuda_callable__(Index i, Index axis, Index axisIndex, const Grid<2, Real, Device, Index>&grid, FuncArgs... args) mutable { EntityType entity(grid); - entity.setCoordinates({ i, j }); + switch (axis) { + case 0: + entity.setCoordinates({ axisIndex, i }); + break; + case 1: + entity.setCoordinates({ i, axisIndex }); + break; + default: TNL_ASSERT_TRUE(false, "Received axis index. Expect in range [0..<1]"); + } + entity.refresh(); func(entity, args...); @@ -548,21 +563,37 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons switch (EntityDimension) { case 0: // Lower horizontal - TNL::Algorithms::ParallelFor2D::exec(0, 0, dimensions.x() + 1, 1, outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(0, dimensions.x() + 1, outer, 0, 0, *this, args...); // Upper horizontal - TNL::Algorithms::ParallelFor2D::exec(0, dimensions.y(), dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(0, dimensions.x() + 1, outer, 0, dimensions.y(), *this, args...); // Left vertical - TNL::Algorithms::ParallelFor2D::exec(0, 0, 1, dimensions.y() + 1, outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(0, dimensions.y() + 1, outer, 1, 0, *this, args...); // Right vertical - TNL::Algorithms::ParallelFor2D::exec(dimensions.x(), 0, dimensions.x() + 1, dimensions.y() + 1, outer, *this, args...); + TNL::Algorithms::ParallelFor::exec(0, dimensions.y() + 1, outer, 1, dimensions.x(), *this, args...); break; case 1: { - auto outerOriented = [=] __cuda_callable__(Index i, Index j, - const Grid<2, Real, Device, Index>&grid, + auto outerOriented = [=] __cuda_callable__(Index i, + Index axis, + Index axisIndex, + const Grid<2, Real, Device, Index>& grid, const CoordinatesType & orientation, const CoordinatesType & basis, FuncArgs... args) mutable { - EntityType entity(grid, CoordinatesType(i, j), orientation, basis); + CoordinatesType coordinates; + + switch (axis) { + case 0: + coordinates[0] = axisIndex; + coordinates[1] = i; + break; + case 1: + coordinates[0] = i; + coordinates[1] = axisIndex; + break; + default: TNL_ASSERT_TRUE(false, "Received axis index. Expect in range [0..<1]"); + } + + EntityType entity(grid, coordinates, orientation, basis); entity.refresh(); @@ -570,41 +601,54 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons }; // Lower horizontal - TNL::Algorithms::ParallelFor2D::exec(0, 0, - dimensions.x() + 1, 1, - outerOriented, - *this, - CoordinatesType(1, 0), - CoordinatesType(0, 1), - args...); + TNL::Algorithms::ParallelFor::exec(0, + dimensions.x(), + outerOriented, + 0, 0, + *this, + CoordinatesType(1, 0), + CoordinatesType(0, 1), + args...); // Upper horizontal - TNL::Algorithms::ParallelFor2D::exec(0, dimensions.y() - 1, - dimensions.x() + 1, dimensions.y(), - outerOriented, - *this, - CoordinatesType(1, 0), - CoordinatesType(0, 1), - args...); + TNL::Algorithms::ParallelFor::exec(0, + dimensions.x(), + outerOriented, + 0, dimensions.y(), + *this, + CoordinatesType(1, 0), + CoordinatesType(0, 1), + args...); // Left vertical - TNL::Algorithms::ParallelFor2D::exec(0, 0, - 1, dimensions.y(), - outerOriented, - *this, - CoordinatesType(0, 1), - CoordinatesType(1, 0), - args...); + TNL::Algorithms::ParallelFor::exec(0, + dimensions.y(), + outerOriented, + 1, 0, + *this, + CoordinatesType(0, 1), + CoordinatesType(1, 0), + args...); // Right vertical - TNL::Algorithms::ParallelFor2D::exec(dimensions.x() - 1, 0, - dimensions.x(), dimensions.y(), - outerOriented, - *this, - CoordinatesType(0, 1), - CoordinatesType(1, 0), - args...); + TNL::Algorithms::ParallelFor::exec(0, + dimensions.y(), + outerOriented, + 1, dimensions.x(), + *this, + CoordinatesType(0, 1), + CoordinatesType(1, 0), + args...); break; } case 2: - if (localBegin < interiorBegin && interiorEnd < localEnd) { + // Lower horizontal + TNL::Algorithms::ParallelFor::exec(0, dimensions.x(), outer, 0, 0, *this, args...); + // Upper horizontal + TNL::Algorithms::ParallelFor::exec(0, dimensions.x(), outer, 0, dimensions.y() - 1, *this, args...); + // Left vertical + TNL::Algorithms::ParallelFor::exec(0, dimensions.y(), outer, 1, 0, *this, args...); + // Right vertical + TNL::Algorithms::ParallelFor::exec(0, dimensions.y(), outer, 1, dimensions.x() - 1, *this, args...); + // TODO: - Verify with the distributed grid + /*if (localBegin < interiorBegin && interiorEnd < localEnd) { // Lower horizontal TNL::Algorithms::ParallelFor2D::exec(interiorBegin.x() - 1, interiorBegin.y() - 1, interiorEnd.x() + 1, interiorBegin.y() + 1, outer, *this, args...); // Upper horizontal @@ -623,8 +667,7 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons // Left vertical TNL::Algorithms::ParallelFor2D::exec(localBegin.x(), interiorBegin.y(), interiorBegin.x(), interiorEnd.y(), outer, *this, args...); // Right vertical - TNL::Algorithms::ParallelFor2D::exec(interiorEnd.x(), interiorBegin.y(), localEnd.x(), interiorEnd.y(), outer, *this, args...); - + TNL::Algorithms::ParallelFor2D::exec(interiorEnd.x(), interiorBegin.y(), localEnd.x(), interiorEnd.y(), outer, *this, args...);*/ break; default: break; diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h index d91b5f449..922673de5 100644 --- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h +++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h @@ -454,11 +454,11 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >:: GridEntity( const GridType& grid ) : grid( grid ), entityIndex( -1 ), - coordinates( 0 ), - neighborEntitiesStorage( *this ) + coordinates( 0 ) { } +// Basis is Discarded template< int Dimension, typename Real, typename Device, -- GitLab From 7cf225c3728d37cdbc1563cadcd52d65ec3e510f Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 25 Nov 2021 23:23:41 +0100 Subject: [PATCH 23/29] Implement HeatMap for TNL grid --- .../HeatEquationGrid/HeatmapTNLGrid/main.h | 109 +++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h index 45154f05c..82b27213d 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h @@ -1,8 +1,110 @@ #pragma once -#include "TNL/Meshes/GridDetails/Grid1D.h" +#include "TNL/Meshes/GridDetails/Grid2D.h" +#include "../Base/HeatmapSolver.h" +template +template +bool HeatmapSolver::solve(const HeatmapSolver::Parameters& params) const { + using Grid2D = TNL::Meshes::Grid<2, Real, Device, int>; + + Grid2D grid; + + // Grid implementation defines its dimensions in the amount of edges. + // To align it size to all other benchmarks substract 1 + grid.setDimensions(params.xSize - 1, params.ySize - 1); + + // TODO: - Improve style of access. It is counterintuitive for person, who doesn't know C++ well + int verticesCount = grid.template getEntitiesCount<0>(); + + TNL::Containers::Array ux(verticesCount), // data at step u + aux(verticesCount);// data at step u + 1 + + // Invalidate ux/aux + ux = 0; + aux = 0; + + const Real hx = params.xDomainSize / (Real)params.xSize; + const Real hy = params.yDomainSize / (Real)params.ySize; + const Real hx_inv = 1 / (hx * hx); + const Real hy_inv = 1 / (hy * hy); + + auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + + auto uxView = ux.getView(), + auxView = aux.getView(); + + auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable { + auto index = entity.getIndex(); + + auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2; + auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2; + + uxView[index] = exp(params.sigma * (x * x + y * y)); + }; + + grid.template forInterior<0>(init); + + if (!writeGNUPlot("data.txt", params, ux)) + return false; + + auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { + auto index = entity.getIndex(); + auto width = grid.getDimensions().x() + 1; + + auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + + (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv; + }; + + auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { + auto index = entity.getIndex(); + + uxView[index] += auxView[index] * timestep; + }; + + Real start = 0; + + while (start < params.finalTime) { + grid.template forInterior<0>(next); + grid.template forInterior<0>(update); + + start += timestep; + } + + return writeGNUPlot("data_final.txt", params, ux); + + return false; +} + +int main(int argc, char* argv[]) { + using Real = double; + + auto config = HeatmapSolver::Parameters::makeInputConfig(); + + TNL::Config::ParameterContainer parameters; + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; + + auto device = parameters.getParameter("device"); + auto params = HeatmapSolver::Parameters(parameters); + + HeatmapSolver solver; + + if (device == "host" && !solver.solve(params)) + return EXIT_FAILURE; + +#ifdef HAVE_CUDA + if (device == "cuda" && !solver.solve(params)) + return EXIT_FAILURE; +#endif + + return EXIT_SUCCESS; +} + + +// TODO: - Move to tests +/* using Grid1D = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>; using Grid2D = TNL::Meshes::Grid<2, float, TNL::Devices::Cuda, int>; @@ -45,6 +147,8 @@ void check2D() { printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); }; + std::cout << "Entities count of 0: dimension" << grid.getEntitiesCount<0>() << std::endl; + grid.forAll<0>(fn0, 0); grid.forInterior<0>(fn0, 1); grid.forBoundary<0>(fn0, 2); @@ -59,7 +163,6 @@ void check2D() { } int main(int argc, char *argv[]) { - check2D(); return 0; } - +*/ -- GitLab From a36658056e448483f1ac591e099b646c03bef66b Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Tue, 30 Nov 2021 12:57:34 +0100 Subject: [PATCH 24/29] WIP: Work on Benchmarks for HeatMap grid --- .../HeatEquationGrid/Base/HeatmapSolver.h | 21 +- .../Base/HeatmapSolverBenchmark.h | 172 +++++++ .../HeatmapNDimGrid/CMakeLists.txt | 4 +- .../HeatmapNDimGrid/implementation.h | 420 ++++++++++++++++++ .../HeatEquationGrid/HeatmapNDimGrid/main.h | 413 +---------------- 5 files changed, 612 insertions(+), 418 deletions(-) create mode 100644 src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h index 92a2b4611..6f3411268 100644 --- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h @@ -21,16 +21,27 @@ public: const Real xDomainSize, yDomainSize; const Real sigma; const Real timeStep, finalTime; + const bool outputData; const bool verbose; Parameters(const TNL::Config::ParameterContainer ¶meters); + Parameters(const int xSize, const int ySize, + const Real xDomainSize, const Real yDomainSize, + const Real sigma, + const Real timeStep, const Real finalTime, + const bool outputData, + const bool verbose): + xSize(xSize), ySize(ySize), + xDomainSize(xDomainSize), yDomainSize(yDomainSize), + sigma(sigma), + timeStep(timeStep), finalTime(finalTime), + outputData(outputData), verbose(verbose) {} static TNL::Config::ConfigDescription makeInputConfig(); }; template bool solve(const Parameters ¶meters) const; - private: template bool writeGNUPlot(const std::string &filename, @@ -49,7 +60,6 @@ TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig( #ifdef HAVE_CUDA config.addEntryEnum("cuda"); #endif - config.addEntry("grid-x-size", "Grid size along x-axis.", 100); config.addEntry("grid-y-size", "Grid size along y-axis.", 100); @@ -73,14 +83,17 @@ HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContaine sigma(parameters.getParameter("sigma")), timeStep(parameters.getParameter("time-step")), finalTime(parameters.getParameter("final-time")), + outputData(parameters.getParameter("outputData")) verbose(parameters.getParameter("verbose")) {} template template bool HeatmapSolver::writeGNUPlot(const std::string &filename, const HeatmapSolver::Parameters ¶ms, - const TNL::Containers::Array &map) const -{ + const TNL::Containers::Array &map) const { + if (!params.outputData) + return true; + std::ofstream out(filename, std::ios::out); if (!out.is_open()) diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h new file mode 100644 index 000000000..2d7fd0504 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h @@ -0,0 +1,172 @@ + +#include + +#include +#include + +#include + +#include "HeatmapSolver.h" + +class HeatmapSolverBenchmark { + public: + template + void exec(const HeatmapSolver::Parameters& params) const; + + template< typename Real > + void runBenchmark(const Benchmark& benchmark, + const Benchmark::MetadataMap, + const int minXDimension, + const int maxXDimension, + const int xStepSizeFactor, + const int minYDimension, + const int maxYDimension, + const int yStepSizeFactor, + const TNL::Config::ParameterContainer& parameters) const; + + static TNL::Config::ConfigDescription makeInputConfig(); +}; + +TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() { + TNL::Config::ConfigDescription config; + + config.addDelimiter("Benchmark settings:"); + config.addEntry("log-file", "Log file name.", "tnl-benchmark-heatmap.log"); + config.addEntry("output-mode", "Mode for opening the log file.", "overwrite"); + config.addEntryEnum("append"); + config.addEntryEnum("overwrite"); + + config.addEntry("precision", "Precision of the arithmetics.", "double"); + config.addEntryEnum("float"); + config.addEntryEnum("double"); + config.addEntryEnum("all"); + + config.addEntry("min-x-dimension", "Minimum dimension over x axis used in the benchmark.", 100); + config.addEntry("max-x-dimension", "Maximum dimension over x axis used in the benchmark.", 10000); + config.addEntry("x-size-step-factor", "Factor determining the dimension grows over x axis. First size is min-x-dimension and each following size is stepFactor*previousSize, up to max-x-dimension.", 2); + + config.addEntry("min-y-dimension", "Minimum dimension over x axis used in the benchmark.", 100); + config.addEntry("max-y-dimension", "Maximum dimension over x axis used in the benchmark.", 10000); + config.addEntry("y-size-step-factor", "Factor determining the dimension grows over y axis. First size is min-y-dimension and each following size is stepFactor*previousSize, up to max-y-dimension.", 2); + + config.addEntry("loops", "Number of iterations for every computation.", 10); + + config.addEntry("verbose", "Verbose mode.", 1); + + config.addDelimiter("Problem settings:"); + config.addEntry("domain-x-size", "Domain size along x-axis.", 2.0); + config.addEntry("domain-y-size", "Domain size along y-axis.", 2.0); + + config.addEntry("sigma", "Sigma in exponential initial condition.", 1.0); + + config.addEntry("time-step", "Time step. By default it is proportional to one over space step square.", 0.0); + config.addEntry("final-time", "Final time of the simulation.", 0.012); + + config.addDelimiter("Device settings:"); + TNL::Devices::Host::configSetup( config ); + TNL::Devices::Cuda::configSetup( config ); +} + +template +void HeatmapSolverBenchmark::runBenchmark(Benchmark& benchmark, + Benchmark::MetadataMap metadata, + const int minXDimension, + const int maxXDimension, + const int xStepSizeFactor, + const int minYDimension, + const int maxYDimension, + const int yStepSizeFactor, + const TNL::Config::ParameterContainer& parameters) const { + const TNL::String precision = getType< Real >(); + metadata["precision"] = precision; + + benchmark.newBenchmark(String("Heatmap grid with (") + precision + ", host allocator" + Device + ")"); + + auto xDomainSize = parameters.getParameter("domain-x-size"); + auto yDomainSize = parameters.getParameter("domain-y-size"); + + auto sigma = parameters.getParameter("sigma"); + + auto timeStep = parameters.getParameter("time-step"); + + auto finalTime = parameters.getParameter("final-time"); + + + for(std::size_t xSize = minXDimension; xSize <= maxXDimension; size *= xStepSizeFactor) { + for(std::size_t ySize = minXDimension; ySize <= maxXDimension; size *= yStepSizeFactor) { + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "xSize", convertToString(xSize) }, + { "ySize", convertToString(ySize) } + })); + + auto lambda = [=]() { + HeatmapSolver::Parameters params(xSize, ySize, xDomainSize, yDomainSize, sigma, timeStep, finalTime, false, false); + + exec(params); + }; + + benchmark.time() + } + } +} + +int main(int argc, char* argv[]) { + HeatmapSolverBenchmark solver; + + auto config = HeatmapSolverBenchmark::makeInputConfig(); + + TNL::Config::ParameterContainer parameters; + + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; + + if (!TNL::Devices::Host::setup( parameters ) || !TNL::Devices::Cuda::setup( parameters ) ) + return EXIT_FAILURE; + + const TNL::String logFileName = parameters.getParameter( "log-file" ); + const TNL::String outputMode = parameters.getParameter( "output-mode" ); + const TNL::String precision = parameters.getParameter( "precision" ); + + const std::size_t minXDimension = parameters.getParameter("min-x-dimension"); + const std::size_t maxXDimension = parameters.getParameter("max-x-dimension"); + const std::size_t xSizeStepFactor = parameters.getParameter("x-size-step-factor"); + + if(xSizeStepFactor <= 1) { + std::cerr << "The value of --x-size-step-factor must be greater than 1." << std::endl; + return EXIT_FAILURE; + } + + const std::size_t minYDimension = parameters.getParameter("min-y-dimension"); + const std::size_t maxYDimension = parameters.getParameter("max-y-dimension"); + const std::size_t ySizeStepFactor = parameters.getParameter("y-size-step-factor"); + + const int loops = parameters.getParameter< int >("loops"); + const int verbose = parameters.getParameter< int >("verbose"); + + if(ySizeStepFactor <= 1) { + std::cerr << "The value of --y-size-step-factor must be greater than 1." << std::endl; + return EXIT_FAILURE; + } + + auto mode = std::ios::out; + + if( outputMode == "append" ) + mode |= std::ios::app; + + std::ofstream logFile( logFileName.getString(), mode ); + + TNL::Benchmarks::Benchmark benchmark(loops, verbose); + + if( precision == "all" || precision == "float" ) + solver.runBenchmark(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters); + if( precision == "all" || precision == "double" ) + solver.runBenchmark(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters); + + if(!benchmark.save(logFile)) { + std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt index a17375117..d493b57b4 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt @@ -1,5 +1,5 @@ if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cu ) + CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cu ) ELSE() - add_executable( benchmark_heat_n_dim_grid ../Base/HeatmapSolver.h main.h main.cpp ) + add_executable( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cpp ) ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h new file mode 100644 index 000000000..ed8ea004d --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h @@ -0,0 +1,420 @@ + +#include +#include +#include +#include +#include +#include + +#include "../Base/HeatmapSolver.h" + +#include +#include +#include +#include +#include + +#pragma once + +template struct bool_pack {}; + +template +using conjunction = std::is_same, bool_pack>; + +template 0)>, + typename = std::enable_if_t::value>> +using Container = TNL::Containers::StaticArray; + +template 0)>, + typename = std::enable_if_t::value>> +class GridEntity { + public: + __cuda_callable__ + inline explicit GridEntity(const Index& vertexIndex, + const Container& coordinates, + const Container& direction) : + vertexIndex(vertexIndex), + coordinates(coordinates), + direction(direction) {}; + + __cuda_callable__ + ~GridEntity() {}; + + __cuda_callable__ + inline Container getCoordinates() const noexcept { + return coordinates; + } + + __cuda_callable__ + inline Index getIndex() const noexcept { + return vertexIndex; + } + private: + const Index vertexIndex; + const Container coordinates; + const Container direction; +}; + +template 0)>> +class Grid { + public: + Grid() {} + ~Grid() {} + + /** + * @brief - Specifies dimensions of the grid + * @param[in] dimensions - A parameter pack, which specifies points count in the specific dimension. + * Most significant dimension is in the beginning of the list. + * Least significant dimension is in the end of the list + */ + template ::value...>::value>, + typename = std::enable_if_t> + void setDimensions(Dimensions... dimensions) noexcept { + Index i = 0; + + for (auto x: { dimensions... }) { + TNL_ASSERT_GT(x, 0, "Dimension must be positive"); + this -> dimensions[i] = x; + i++; + } + + refreshDimensionMaps(); + } + /** + * @param[in] index - index of dimension + */ + inline __cuda_callable__ Index getDimension(Index index) const noexcept { + TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension"); + + return dimensions[index]; + } + /** + * @param[in] indices - A dimension index pack + */ + template ::value...>::value>, + typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> + Container getDimensions(DimensionIndex... indices) const noexcept { + Container result{indices...}; + + for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) + result[i] = this -> getDimension(result[i]); + + return result; + } + /** + * @param[in] index - index of dimension + */ + inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept { + TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension"); + + return cumulativeDimensionMap(index); + } + /** + * @brief - Returns the number of entities of specific dimension + */ + template ::value...>::value>, + typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> + Container getEntitiesCounts(DimensionIndex... indices) const noexcept { + Container result{indices...}; + + for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) + result[i] = this -> getEntitiesCount(result[i]); + + return result; + } + /** + * @param[in] index - index of dimension + */ + inline __cuda_callable__ Index getEndIndex(Index index) const noexcept { + TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); + TNL_ASSERT_LT(index, Dimension, "Index must be less than or equal to Dimension"); + + return this -> getDimension(index) - 1; + } + /** + * @brief - Returns the last index of specific dimensions + */ + template ::value...>::value>, + typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> + Container getEndIndices(DimensionIndex... indices) const noexcept { + Container result{ indices... }; + + for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) + result[i] = this->getEndIndex(result[i]); + + return result; + } + /** + * @brief - Traversers all elements in the grid + */ + template + void traverseAll(Function function, FunctionArgs... args) const noexcept { + auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable { + function(index, args...); + }; + + TNL::Algorithms::ParallelFor::exec(0, cumulativeDimensionMap[0], lambda, args...); + } + /** + * @brief - Traverses a grid from start index to end index. + * + * @param[in] start - a start index of point + * @param[in] end - an end index of point + * @param[in] directions - A pack of boolean vector flags with the size of the dimension. + * For example, let's have the 3-dimensional grid. + * A pack {false, false, false} will call function for all points + * A pack {true, false, false} will call function for edges directed over dimension at index of true + * A pack {true, true, false} will call function for faces directed over dimension at index of true + * A pack {true, true, true} will call function for cells directed over + * + */ + template + void traverse(const Container& firstVertex, + const Container& secondVertex, + const TNL::Containers::Array, Device>& directions, + Function function, + FunctionArgs... args) const noexcept { + Index verticesCount = 1; + Container traverseRectOrigin, traverseRectDimensions, + dimensionsProducts, traverseRectDimensionsProducts; + + for (Index i = 0; i < Dimension; i++) { + traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1; + verticesCount *= traverseRectDimensions[i]; + traverseRectOrigin[i] = std::min(firstVertex[i], secondVertex[i]); + + TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range"); + TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range"); + } + + dimensionsProducts = getDimensionProducts(dimensions); + traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions); + + auto outerFunction = [=] __cuda_callable__(Index offset, + const Container& traverseRectOrigin, + const Container& traverseRectDimensions, + const Container& traverseRectDimensionsProducts, + const Container& dimensionsProducts, + FunctionArgs... args) mutable { + auto entity = this -> makeEntitity(offset, + traverseRectOrigin, traverseRectDimensions, + traverseRectDimensionsProducts, dimensionsProducts); + + function(entity, args...); + }; + + Index lowerBound = 0, upperBound = verticesCount; + + for (Index i = 0; i < directions.getSize(); i++) { + TNL::Algorithms::ParallelFor::exec(lowerBound, upperBound, outerFunction, + traverseRectOrigin, + traverseRectDimensions, + traverseRectDimensionsProducts, + dimensionsProducts, + args...); + } + } + private: + Container dimensions; + /** + * @brief - A dimension map is a store for dimension limits over all combinations of basis. + * First, (n choose 0) elements will contain the count of 0 dimension elements + * Second, (n choose 1) elements will contain the count of 1-dimension elements + * .... + * + * For example, let's have a 3-d grid, then the map indexing will be the next: + * 0 - 0 - count of vertices + * 1, 2, 3 - count of edges in z, y, x plane + * 4, 5, 6 - count of faces in yz, xz, xy plane + * 7 - count of cells in z y x plane + * + * @warning - The ordering of is lexigraphical. + */ + Container<1 << Dimension, Index> dimensionMap; + /** + * @brief - A cumulative map over dimensions. + */ + Container cumulativeDimensionMap; + /** + * @brief - Fills dimensions map for N-dimensional Grid. + * + * @complexity - O(2 ^ Dimension) + */ + void refreshDimensionMaps() noexcept { + std::array combinationBuffer = {}; + std::size_t j = 0; + + for (std::size_t i = 0; i < Dimension + 1; i++) + cumulativeDimensionMap[i] = 0; + + for (std::size_t i = 0; i <= Dimension; i++) { + std::fill(combinationBuffer.begin(), combinationBuffer.end(), false); + std::fill(combinationBuffer.end() - i, combinationBuffer.end(), true); + + do { + int result = 1; + + for (std::size_t k = 0; k < combinationBuffer.size(); k++) + result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k]; + + dimensionMap[j] = result; + cumulativeDimensionMap[i] += result; + + j++; + } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end())); + } + } + + __cuda_callable__ inline + GridEntity makeEntitity(const Index index, + const Container& traverseRectOrigin, + const Container& traverseRectDimensions, + const Container& traverseRectDimensionsProducts, + const Container& dimensionsProducts) const { + const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions); + Container globalCoordinates = 0; + + for (Index i = 0; i < Dimension; i++) + globalCoordinates[i] = traverseRectOrigin[i] + traverseCoordinates[i]; + + const auto globalIndex = getIndex(globalCoordinates, dimensionsProducts); + + return GridEntity(globalIndex, globalCoordinates, {}); + } + /** + * Calculates position in the specific boundaries + */ + __cuda_callable__ inline + Container getCoordinates(const Index index, const Container &dimensions) const { + Container coordinates = 0; + Index tmpIndex = index; + + Index dim = Dimension - 1; + + // TODO: - Implement overflow check. + while (tmpIndex) { + Index dimension = dimensions[dim], + quotient = tmpIndex / dimension, + reminder = tmpIndex - (dimension * quotient); + + coordinates[dim] = reminder; + tmpIndex = quotient; + + dim -= 1; + } + + return coordinates; + } + /** + * Calculates product matrix based on the dimension + */ + __cuda_callable__ inline + Container getDimensionProducts(const Container& dimensions) const noexcept { + Container products = 0; + + for (Index i = Dimension; i > 0; i--) + products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i]; + + return products; + } + /** + * Calculates index based on the dimension + */ + __cuda_callable__ inline + Index getIndex(const Container& coordinates, + const Container& dimensionProducts) const { + Index index = 0; + + for (Index i = 0; i < Dimension; i++) + index += coordinates[i] * dimensionProducts[i]; + + return index; + } + +}; + +template +template +bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) const { + Grid<2, int, Device> grid; + + grid.setDimensions(params.xSize, params.ySize); + + const Real hx = params.xDomainSize / (Real)grid.getDimension(0); + const Real hy = params.yDomainSize / (Real)grid.getDimension(1); + const Real hx_inv = 1 / (hx * hx); + const Real hy_inv = 1 / (hy * hy); + + auto entitiesCount = grid.getEntitiesCount(0); + auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + + TNL::Containers::Array ux(entitiesCount), // data at step u + aux(entitiesCount);// data at step u + 1 + + auto uxView = ux.getView(), auxView = aux.getView(); + + // Invalidate ux/aux + ux = 0; + aux = 0; + + auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { + auto position = entity.getCoordinates(); + auto index = entity.getIndex(); + + auto x = position[0] * hx - params.xDomainSize / 2; + auto y = position[1] * hx - params.yDomainSize / 2; + + uxView[index] = exp(params.sigma * (x * x + y * y)); + }; + + const Container<2, bool> direction{ false, false }; + + grid.traverse({ 1, 1 }, + { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 }, + { direction }, + init); + + if (!writeGNUPlot("data.txt", params, ux)) + return false; + + auto xDimension = grid.getDimension(0); + + auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { + auto index = entity.getIndex(); + + auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + + (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv; + }; + + auto update = [=] __cuda_callable__(int i) mutable { + uxView[i] += auxView[i] * timestep; + }; + + Real start = 0; + + while (start < params.finalTime) { + grid.traverse({ 1, 1 }, + { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 }, + { direction }, + next); + + grid.traverseAll(update); + + start += timestep; + } + + return writeGNUPlot("data_final.txt", params, ux); +}; diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h index fbd536f8f..e6671e2e5 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h @@ -7,418 +7,7 @@ #include #include "../Base/HeatmapSolver.h" - -#include -#include -#include -#include -#include - -#pragma once - -template struct bool_pack {}; - -template -using conjunction = std::is_same, bool_pack>; - -template 0)>, - typename = std::enable_if_t::value>> -using Container = TNL::Containers::StaticArray; - -template 0)>, - typename = std::enable_if_t::value>> -class GridEntity { - public: - __cuda_callable__ - inline explicit GridEntity(const Index& vertexIndex, - const Container& coordinates, - const Container& direction) : - vertexIndex(vertexIndex), - coordinates(coordinates), - direction(direction) {}; - - __cuda_callable__ - ~GridEntity() {}; - - __cuda_callable__ - inline Container getCoordinates() const noexcept { - return coordinates; - } - - __cuda_callable__ - inline Index getIndex() const noexcept { - return vertexIndex; - } - private: - const Index vertexIndex; - const Container coordinates; - const Container direction; -}; - -template 0)>> -class Grid { - public: - Grid() {} - ~Grid() {} - - /** - * @brief - Specifies dimensions of the grid - * @param[in] dimensions - A parameter pack, which specifies points count in the specific dimension. - * Most significant dimension is in the beginning of the list. - * Least significant dimension is in the end of the list - */ - template ::value...>::value>, - typename = std::enable_if_t> - void setDimensions(Dimensions... dimensions) noexcept { - Index i = 0; - - for (auto x: { dimensions... }) { - TNL_ASSERT_GT(x, 0, "Dimension must be positive"); - this -> dimensions[i] = x; - i++; - } - - refreshDimensionMaps(); - } - /** - * @param[in] index - index of dimension - */ - inline __cuda_callable__ Index getDimension(Index index) const noexcept { - TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); - TNL_ASSERT_LT(index, Dimension, "Index must be less than Dimension"); - - return dimensions[index]; - } - /** - * @param[in] indices - A dimension index pack - */ - template ::value...>::value>, - typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getDimensions(DimensionIndex... indices) const noexcept { - Container result{indices...}; - - for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) - result[i] = this -> getDimension(result[i]); - - return result; - } - /** - * @param[in] index - index of dimension - */ - inline __cuda_callable__ Index getEntitiesCount(Index index) const noexcept { - TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); - TNL_ASSERT_LE(index, Dimension, "Index must be less than or equal to Dimension"); - - return cumulativeDimensionMap(index); - } - /** - * @brief - Returns the number of entities of specific dimension - */ - template ::value...>::value>, - typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getEntitiesCounts(DimensionIndex... indices) const noexcept { - Container result{indices...}; - - for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) - result[i] = this -> getEntitiesCount(result[i]); - - return result; - } - /** - * @param[in] index - index of dimension - */ - inline __cuda_callable__ Index getEndIndex(Index index) const noexcept { - TNL_ASSERT_GE(index, 0, "Index must be greater than zero"); - TNL_ASSERT_LT(index, Dimension, "Index must be less than or equal to Dimension"); - - return this -> getDimension(index) - 1; - } - /** - * @brief - Returns the last index of specific dimensions - */ - template ::value...>::value>, - typename = std::enable_if_t<(sizeof...(DimensionIndex) > 0)>> - Container getEndIndices(DimensionIndex... indices) const noexcept { - Container result{ indices... }; - - for (std::size_t i = 0; i < sizeof...(DimensionIndex); i++) - result[i] = this->getEndIndex(result[i]); - - return result; - } - /** - * @brief - Traversers all elements in the grid - */ - template - void traverseAll(Function function, FunctionArgs... args) const noexcept { - auto lambda = [=] __cuda_callable__(const Index index, FunctionArgs... args) mutable { - function(index, args...); - }; - - TNL::Algorithms::ParallelFor::exec(0, cumulativeDimensionMap[0], lambda, args...); - } - /** - * @brief - Traverses a grid from start index to end index. - * - * @param[in] start - a start index of point - * @param[in] end - an end index of point - * @param[in] directions - A pack of boolean vector flags with the size of the dimension. - * For example, let's have the 3-dimensional grid. - * A pack {false, false, false} will call function for all points - * A pack {true, false, false} will call function for edges directed over dimension at index of true - * A pack {true, true, false} will call function for faces directed over dimension at index of true - * A pack {true, true, true} will call function for cells directed over - * - */ - template - void traverse(const Container& firstVertex, - const Container& secondVertex, - const TNL::Containers::Array, Device>& directions, - Function function, - FunctionArgs... args) const noexcept { - Index verticesCount = 1; - Container traverseRectOrigin, traverseRectDimensions, - dimensionsProducts, traverseRectDimensionsProducts; - - for (Index i = 0; i < Dimension; i++) { - traverseRectDimensions[i] = abs(secondVertex[i] - firstVertex[i]) + 1; - verticesCount *= traverseRectDimensions[i]; - traverseRectOrigin[i] = std::min(firstVertex[i], secondVertex[i]); - - TNL_ASSERT_LT(firstVertex[i], dimensions[i], "End index must be in dimensions range"); - TNL_ASSERT_LT(secondVertex[i], dimensions[i], "Start index must be in dimensions range"); - } - - dimensionsProducts = getDimensionProducts(dimensions); - traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions); - - auto outerFunction = [=] __cuda_callable__(Index offset, - const Container& traverseRectOrigin, - const Container& traverseRectDimensions, - const Container& traverseRectDimensionsProducts, - const Container& dimensionsProducts, - FunctionArgs... args) mutable { - auto entity = this -> makeEntitity(offset, - traverseRectOrigin, traverseRectDimensions, - traverseRectDimensionsProducts, dimensionsProducts); - - function(entity, args...); - }; - - Index lowerBound = 0, upperBound = verticesCount; - - for (Index i = 0; i < directions.getSize(); i++) { - TNL::Algorithms::ParallelFor::exec(lowerBound, upperBound, outerFunction, - traverseRectOrigin, - traverseRectDimensions, - traverseRectDimensionsProducts, - dimensionsProducts, - args...); - } - } - private: - Container dimensions; - /** - * @brief - A dimension map is a store for dimension limits over all combinations of basis. - * First, (n choose 0) elements will contain the count of 0 dimension elements - * Second, (n choose 1) elements will contain the count of 1-dimension elements - * .... - * - * For example, let's have a 3-d grid, then the map indexing will be the next: - * 0 - 0 - count of vertices - * 1, 2, 3 - count of edges in z, y, x plane - * 4, 5, 6 - count of faces in yz, xz, xy plane - * 7 - count of cells in z y x plane - * - * @warning - The ordering of is lexigraphical. - */ - Container<1 << Dimension, Index> dimensionMap; - /** - * @brief - A cumulative map over dimensions. - */ - Container cumulativeDimensionMap; - /** - * @brief - Fills dimensions map for N-dimensional Grid. - * - * @complexity - O(2 ^ Dimension) - */ - void refreshDimensionMaps() noexcept { - std::array combinationBuffer = {}; - std::size_t j = 0; - - for (std::size_t i = 0; i < Dimension + 1; i++) - cumulativeDimensionMap[i] = 0; - - for (std::size_t i = 0; i <= Dimension; i++) { - std::fill(combinationBuffer.begin(), combinationBuffer.end(), false); - std::fill(combinationBuffer.end() - i, combinationBuffer.end(), true); - - do { - int result = 1; - - for (std::size_t k = 0; k < combinationBuffer.size(); k++) - result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k]; - - dimensionMap[j] = result; - cumulativeDimensionMap[i] += result; - - j++; - } while (std::next_permutation(combinationBuffer.begin(), combinationBuffer.end())); - } - } - - __cuda_callable__ inline - GridEntity makeEntitity(const Index index, - const Container& traverseRectOrigin, - const Container& traverseRectDimensions, - const Container& traverseRectDimensionsProducts, - const Container& dimensionsProducts) const { - const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions); - Container globalCoordinates = 0; - - for (Index i = 0; i < Dimension; i++) - globalCoordinates[i] = traverseRectOrigin[i] + traverseCoordinates[i]; - - const auto globalIndex = getIndex(globalCoordinates, dimensionsProducts); - - return GridEntity(globalIndex, globalCoordinates, {}); - } - /** - * Calculates position in the specific boundaries - */ - __cuda_callable__ inline - Container getCoordinates(const Index index, const Container &dimensions) const { - Container coordinates = 0; - Index tmpIndex = index; - - Index dim = Dimension - 1; - - // TODO: - Implement overflow check. - while (tmpIndex) { - Index dimension = dimensions[dim], - quotient = tmpIndex / dimension, - reminder = tmpIndex - (dimension * quotient); - - coordinates[dim] = reminder; - tmpIndex = quotient; - - dim -= 1; - } - - return coordinates; - } - /** - * Calculates product matrix based on the dimension - */ - __cuda_callable__ inline - Container getDimensionProducts(const Container& dimensions) const noexcept { - Container products = 0; - - for (Index i = Dimension; i > 0; i--) - products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i]; - - return products; - } - /** - * Calculates index based on the dimension - */ - __cuda_callable__ inline - Index getIndex(const Container& coordinates, - const Container& dimensionProducts) const { - Index index = 0; - - for (Index i = 0; i < Dimension; i++) - index += coordinates[i] * dimensionProducts[i]; - - return index; - } - -}; - -template -template -bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) const { - Grid<2, int, Device> grid; - - grid.setDimensions(params.xSize, params.ySize); - - const Real hx = params.xDomainSize / (Real)grid.getDimension(0); - const Real hy = params.yDomainSize / (Real)grid.getDimension(1); - const Real hx_inv = 1 / (hx * hx); - const Real hy_inv = 1 / (hy * hy); - - auto entitiesCount = grid.getEntitiesCount(0); - auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); - - TNL::Containers::Array ux(entitiesCount), // data at step u - aux(entitiesCount);// data at step u + 1 - - auto uxView = ux.getView(), auxView = aux.getView(); - - // Invalidate ux/aux - ux = 0; - aux = 0; - - auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { - auto position = entity.getCoordinates(); - auto index = entity.getIndex(); - - auto x = position[0] * hx - params.xDomainSize / 2; - auto y = position[1] * hx - params.yDomainSize / 2; - - uxView[index] = exp(params.sigma * (x * x + y * y)); - }; - - const Container<2, bool> direction{ false, false }; - - grid.traverse({ 1, 1 }, - { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 }, - { direction }, - init); - - if (!writeGNUPlot("data.txt", params, ux)) - return false; - - auto xDimension = grid.getDimension(0); - - auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { - auto index = entity.getIndex(); - - auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + - (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv; - }; - - auto update = [=] __cuda_callable__(int i) mutable { - uxView[i] += auxView[i] * timestep; - }; - - Real start = 0; - - while (start < params.finalTime) { - grid.traverse({ 1, 1 }, - { grid.getEndIndex(0) - 1, grid.getEndIndex(1) - 1 }, - { direction }, - next); - - grid.traverseAll(update); - - start += timestep; - } - - return writeGNUPlot("data_final.txt", params, ux); -}; - +#include "implementation.h" int main(int argc, char* argv[]) { using Real = double; -- GitLab From 9a1e341479f35040ab7863f46494c2bd21772447 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Tue, 30 Nov 2021 15:17:35 +0100 Subject: [PATCH 25/29] Implement benchmark for NDim grid --- .../HeatEquationGrid/Base/HeatmapSolver.h | 2 +- .../Base/HeatmapSolverBenchmark.h | 115 ++++++++++-------- .../HeatmapNDimGrid/CMakeLists.txt | 7 +- .../HeatmapNDimGrid/benchmark/CMakeLists.txt | 5 + .../HeatmapNDimGrid/{ => benchmark}/main.cpp | 0 .../HeatmapNDimGrid/{ => benchmark}/main.cu | 0 .../HeatmapNDimGrid/benchmark/main.h | 6 + .../HeatmapNDimGrid/solution/CMakeLists.txt | 5 + .../HeatmapNDimGrid/solution/main.cpp | 2 + .../HeatmapNDimGrid/solution/main.cu | 2 + .../HeatmapNDimGrid/{ => solution}/main.h | 26 +--- 11 files changed, 92 insertions(+), 78 deletions(-) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt rename src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/{ => benchmark}/main.cpp (100%) rename src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/{ => benchmark}/main.cu (100%) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu rename src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/{ => solution}/main.h (62%) diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h index 6f3411268..6974e06bc 100644 --- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h @@ -83,7 +83,7 @@ HeatmapSolver::Parameters::Parameters(const TNL::Config::ParameterContaine sigma(parameters.getParameter("sigma")), timeStep(parameters.getParameter("time-step")), finalTime(parameters.getParameter("final-time")), - outputData(parameters.getParameter("outputData")) + outputData(parameters.getParameter("outputData")), verbose(parameters.getParameter("verbose")) {} template diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h index 2d7fd0504..6e9539f2a 100644 --- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h @@ -8,20 +8,21 @@ #include "HeatmapSolver.h" +#pragma once + class HeatmapSolverBenchmark { public: template - void exec(const HeatmapSolver::Parameters& params) const; - - template< typename Real > - void runBenchmark(const Benchmark& benchmark, - const Benchmark::MetadataMap, - const int minXDimension, - const int maxXDimension, - const int xStepSizeFactor, - const int minYDimension, - const int maxYDimension, - const int yStepSizeFactor, + void exec(const typename HeatmapSolver::Parameters& params) const; + + template + void runBenchmark(TNL::Benchmarks::Benchmark<> & benchmark, + const std::size_t minXDimension, + const std::size_t maxXDimension, + const std::size_t xStepSizeFactor, + const std::size_t minYDimension, + const std::size_t maxYDimension, + const std::size_t yStepSizeFactor, const TNL::Config::ParameterContainer& parameters) const; static TNL::Config::ConfigDescription makeInputConfig(); @@ -42,11 +43,11 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() { config.addEntryEnum("all"); config.addEntry("min-x-dimension", "Minimum dimension over x axis used in the benchmark.", 100); - config.addEntry("max-x-dimension", "Maximum dimension over x axis used in the benchmark.", 10000); + config.addEntry("max-x-dimension", "Maximum dimension over x axis used in the benchmark.", 200); config.addEntry("x-size-step-factor", "Factor determining the dimension grows over x axis. First size is min-x-dimension and each following size is stepFactor*previousSize, up to max-x-dimension.", 2); config.addEntry("min-y-dimension", "Minimum dimension over x axis used in the benchmark.", 100); - config.addEntry("max-y-dimension", "Maximum dimension over x axis used in the benchmark.", 10000); + config.addEntry("max-y-dimension", "Maximum dimension over x axis used in the benchmark.", 200); config.addEntry("y-size-step-factor", "Factor determining the dimension grows over y axis. First size is min-y-dimension and each following size is stepFactor*previousSize, up to max-y-dimension.", 2); config.addEntry("loops", "Number of iterations for every computation.", 10); @@ -65,48 +66,54 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() { config.addDelimiter("Device settings:"); TNL::Devices::Host::configSetup( config ); TNL::Devices::Cuda::configSetup( config ); -} - -template -void HeatmapSolverBenchmark::runBenchmark(Benchmark& benchmark, - Benchmark::MetadataMap metadata, - const int minXDimension, - const int maxXDimension, - const int xStepSizeFactor, - const int minYDimension, - const int maxYDimension, - const int yStepSizeFactor, - const TNL::Config::ParameterContainer& parameters) const { - const TNL::String precision = getType< Real >(); - metadata["precision"] = precision; - - benchmark.newBenchmark(String("Heatmap grid with (") + precision + ", host allocator" + Device + ")"); - - auto xDomainSize = parameters.getParameter("domain-x-size"); - auto yDomainSize = parameters.getParameter("domain-y-size"); - auto sigma = parameters.getParameter("sigma"); + return config; +} - auto timeStep = parameters.getParameter("time-step"); +template +void HeatmapSolverBenchmark::exec(const typename HeatmapSolver::Parameters& params) const { + HeatmapSolver solver; - auto finalTime = parameters.getParameter("final-time"); + auto result = solver.template solve(params); + if (!result) + std::cout << "Fail to solve for grid size (" << params.xSize << ", " << params.ySize << ")" << std::endl; +} - for(std::size_t xSize = minXDimension; xSize <= maxXDimension; size *= xStepSizeFactor) { - for(std::size_t ySize = minXDimension; ySize <= maxXDimension; size *= yStepSizeFactor) { - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - { "xSize", convertToString(xSize) }, - { "ySize", convertToString(ySize) } +template +void HeatmapSolverBenchmark::runBenchmark(TNL::Benchmarks::Benchmark<>& benchmark, + const std::size_t minXDimension, + const std::size_t maxXDimension, + const std::size_t xStepSizeFactor, + const std::size_t minYDimension, + const std::size_t maxYDimension, + const std::size_t yStepSizeFactor, + const TNL::Config::ParameterContainer& parameters) const { + Real xDomainSize = parameters.getParameter("domain-x-size"); + Real yDomainSize = parameters.getParameter("domain-y-size"); + Real sigma = parameters.getParameter("sigma"); + Real timeStep = parameters.getParameter("time-step"); + Real finalTime = parameters.getParameter("final-time"); + + auto precision = TNL::getType(), device = TNL::getType(); + + std::cout << "Heatmap grid with (" + precision + ", host allocator " + device + ")" << std::endl; + + for(std::size_t xSize = minXDimension; xSize <= maxXDimension; xSize *= xStepSizeFactor) { + for(std::size_t ySize = minXDimension; ySize <= maxXDimension; ySize *= yStepSizeFactor) { + benchmark.setMetadataColumns( TNL::Benchmarks::Benchmark<>::MetadataColumns({ + { "precision", precision }, + { "xSize", TNL::convertToString(xSize) }, + { "ySize", TNL::convertToString(ySize) } })); auto lambda = [=]() { - HeatmapSolver::Parameters params(xSize, ySize, xDomainSize, yDomainSize, sigma, timeStep, finalTime, false, false); + typename HeatmapSolver::Parameters params(xSize, ySize, xDomainSize, yDomainSize, sigma, timeStep, finalTime, false, false); - exec(params); + exec(params); }; - benchmark.time() + benchmark.time("_", lambda); } } } @@ -156,17 +163,23 @@ int main(int argc, char* argv[]) { std::ofstream logFile( logFileName.getString(), mode ); - TNL::Benchmarks::Benchmark benchmark(loops, verbose); + TNL::Benchmarks::Benchmark<> benchmark(logFile, loops, verbose); + + // write global metadata into a separate file + std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata(); + TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" ); if( precision == "all" || precision == "float" ) - solver.runBenchmark(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters); + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); if( precision == "all" || precision == "double" ) - solver.runBenchmark(benchmark, metadata, minXDimension, maxXDimension, xSizeStepFactor, minXDimension, maxYDimension, ySizeStepFactor, parameters); + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); - if(!benchmark.save(logFile)) { - std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl; - return EXIT_FAILURE; - } +#ifdef HAVE_CUDA + if( precision == "all" || precision == "float" ) + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + if( precision == "all" || precision == "double" ) + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); +#endif return EXIT_SUCCESS; } diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt index d493b57b4..58c97ef0d 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/CMakeLists.txt @@ -1,5 +1,2 @@ -if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cu ) -ELSE() - add_executable( benchmark_heat_n_dim_grid implementation.h ../Base/HeatmapSolver.h main.h main.cpp ) -ENDIF() +ADD_SUBDIRECTORY( benchmark ) +ADD_SUBDIRECTORY( solution ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt new file mode 100644 index 000000000..d95ce1fa7 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) +ELSE() + add_executable( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cpp similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cpp rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cpp diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cu similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.cu rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.cu diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h new file mode 100644 index 000000000..24eb97da8 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/main.h @@ -0,0 +1,6 @@ + +#include "../../Base/HeatmapSolver.h" +#include "../../Base/HeatmapSolverBenchmark.h" +#include "../implementation.h" + +#pragma once diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt new file mode 100644 index 000000000..7319b2aca --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) +ELSE() + add_executable( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cpp @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.cu @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.h similarity index 62% rename from src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h rename to src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.h index e6671e2e5..54d0c86f0 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/main.h @@ -6,8 +6,8 @@ #include #include -#include "../Base/HeatmapSolver.h" -#include "implementation.h" +#include "../../Base/HeatmapSolver.h" +#include "../implementation.h" int main(int argc, char* argv[]) { using Real = double; @@ -19,6 +19,9 @@ int main(int argc, char* argv[]) { return EXIT_FAILURE; auto device = parameters.getParameter("device"); + + parameters.addParameter("outputData", true); + auto params = HeatmapSolver::Parameters(parameters); HeatmapSolver solver; @@ -33,22 +36,3 @@ int main(int argc, char* argv[]) { return EXIT_SUCCESS; } - - -/* -int main(int argc, char *argv[]) { - Grid<2, int, TNL::Devices::Cuda> grid; - - grid.setDimensions(5, 5); - - auto fn_entity = [=] __cuda_callable__ (GridEntity<2, int> entity) { - printf("%d %d %d\n", entity.getCoordinates()[0], entity.getCoordinates()[1], entity.getIndex()); - }; - - Container<2, int> direction { 0, 0 }; - - grid.traverse({ 1, 1 }, { 4, 4 }, { direction }, fn_entity); - - return 0; -} -*/ -- GitLab From f1d13b09ad1ca00fbdbf68f7acfab4c979f61c7b Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 2 Dec 2021 09:26:12 +0100 Subject: [PATCH 26/29] Refactor parallel for solution to use benchmarks --- .../HeatmapNDimGrid/benchmark/CMakeLists.txt | 4 +-- .../HeatmapNDimGrid/solution/CMakeLists.txt | 4 +-- .../HeatmapParallelFor/CMakeLists.txt | 7 ++-- .../benchmark/CMakeLists.txt | 5 +++ .../{ => benchmark}/main.cpp | 0 .../{ => benchmark}/main.cu | 0 .../HeatmapParallelFor/benchmark/main.h | 5 +++ .../{main.h => implementation.h} | 28 --------------- .../solution/CMakeLists.txt | 5 +++ .../HeatmapParallelFor/solution/main.cpp | 2 ++ .../HeatmapParallelFor/solution/main.cu | 2 ++ .../HeatmapParallelFor/solution/main.h | 35 +++++++++++++++++++ 12 files changed, 60 insertions(+), 37 deletions(-) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt rename src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/{ => benchmark}/main.cpp (100%) rename src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/{ => benchmark}/main.cu (100%) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h rename src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/{main.h => implementation.h} (76%) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt index d95ce1fa7..b52b3909d 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/benchmark/CMakeLists.txt @@ -1,5 +1,5 @@ if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) + CUDA_ADD_EXECUTABLE( heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) ELSE() - add_executable( tnl_heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) + add_executable( heat_n_dim_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt index 7319b2aca..5ab54f05a 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/solution/CMakeLists.txt @@ -1,5 +1,5 @@ if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) + CUDA_ADD_EXECUTABLE( heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) ELSE() - add_executable( tnl_heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) + add_executable( heat_n_dim_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt index 51ee69d15..58c97ef0d 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/CMakeLists.txt @@ -1,5 +1,2 @@ -if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cu ) -ELSE() - add_executable( benchmark_heat_parallel_for ../Base/HeatmapSolver.h main.h main.cpp ) -ENDIF() +ADD_SUBDIRECTORY( benchmark ) +ADD_SUBDIRECTORY( solution ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt new file mode 100644 index 000000000..364b4a99f --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( heat_parallel_for_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) +ELSE() + add_executable( heat_parallel_for_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cpp similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cpp rename to src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cpp diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cu similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.cu rename to src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.cu diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h new file mode 100644 index 000000000..64defc7d1 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/benchmark/main.h @@ -0,0 +1,5 @@ +#include "../../Base/HeatmapSolver.h" +#include "../../Base/HeatmapSolverBenchmark.h" +#include "../implementation.h" + +#pragma once diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h similarity index 76% rename from src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h rename to src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h index 08658ea14..c8ad25eb9 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h @@ -1,7 +1,4 @@ -#include -#include -#include #include #include @@ -89,28 +86,3 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c return writeGNUPlot("data_final.txt", params, ux); } - -int main(int argc, char *argv[]) { - using Real = double; - - auto config = HeatmapSolver::Parameters::makeInputConfig(); - - TNL::Config::ParameterContainer parameters; - if (!parseCommandLine(argc, argv, config, parameters)) - return EXIT_FAILURE; - - auto device = parameters.getParameter("device"); - auto params = HeatmapSolver::Parameters(parameters); - - HeatmapSolver solver; - - if (device == "host" && !solver.solve(params)) - return EXIT_FAILURE; - -#ifdef HAVE_CUDA - if (device == "cuda" && !solver.solve(params)) - return EXIT_FAILURE; -#endif - - return EXIT_SUCCESS; -} diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt new file mode 100644 index 000000000..b134a4b22 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) +ELSE() + add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cpp @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.cu @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h new file mode 100644 index 000000000..c3192eb13 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h @@ -0,0 +1,35 @@ + +#include +#include +#include +#include +#include +#include + +#include "../../Base/HeatmapSolver.h" +#include "../implementation.h" + +int main(int argc, char *argv[]) { + using Real = double; + + auto config = HeatmapSolver::Parameters::makeInputConfig(); + + TNL::Config::ParameterContainer parameters; + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; + + auto device = parameters.getParameter("device"); + auto params = HeatmapSolver::Parameters(parameters); + + HeatmapSolver solver; + + if (device == "host" && !solver.solve(params)) + return EXIT_FAILURE; + +#ifdef HAVE_CUDA + if (device == "cuda" && !solver.solve(params)) + return EXIT_FAILURE; +#endif + + return EXIT_SUCCESS; +} -- GitLab From 50f94102549b3712417c47a4a6d176dee57e6ff3 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 2 Dec 2021 15:51:40 +0100 Subject: [PATCH 27/29] Implement benchmarks for TNL Grid/ParallelFor --- .../HeatEquationGrid/Base/HeatmapSolver.h | 2 +- .../Base/HeatmapSolverBenchmark.h | 31 +++- .../HeatmapNDimGrid/implementation.h | 40 +++-- .../solution/CMakeLists.txt | 4 +- .../HeatmapParallelFor/solution/main.h | 6 +- .../HeatmapTNLGrid/CMakeLists.txt | 7 +- .../HeatmapTNLGrid/benchmark/CMakeLists.txt | 5 + .../HeatmapTNLGrid/{ => benchmark}/main.cpp | 0 .../HeatmapTNLGrid/{ => benchmark}/main.cu | 0 .../HeatmapTNLGrid/benchmark/main.h | 5 + .../HeatmapTNLGrid/implementation.h | 77 ++++++++ .../HeatEquationGrid/HeatmapTNLGrid/main.h | 168 ------------------ .../HeatmapTNLGrid/solution/CMakeLists.txt | 5 + .../HeatmapTNLGrid/solution/main.cpp | 2 + .../HeatmapTNLGrid/solution/main.cu | 2 + .../HeatmapTNLGrid/solution/main.h | 37 ++++ 16 files changed, 186 insertions(+), 205 deletions(-) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt rename src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/{ => benchmark}/main.cpp (100%) rename src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/{ => benchmark}/main.cu (100%) create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h delete mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu create mode 100644 src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h index 6974e06bc..9adfef757 100644 --- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h @@ -54,7 +54,7 @@ TNL::Config::ConfigDescription HeatmapSolver::Parameters::makeInputConfig( { TNL::Config::ConfigDescription config; - config.addEntry("device", "Device the computation will run on.", "host"); + config.addEntry("device", "Device the computation will run on.", "cuda"); config.addEntryEnum("host"); #ifdef HAVE_CUDA diff --git a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h index 6e9539f2a..a8b507aa4 100644 --- a/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h +++ b/src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h @@ -37,6 +37,13 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() { config.addEntryEnum("append"); config.addEntryEnum("overwrite"); + config.addEntry("device", "Device the computation will run on.", "cuda"); + config.addEntryEnum("host"); + +#ifdef HAVE_CUDA + config.addEntryEnum("cuda"); +#endif + config.addEntry("precision", "Precision of the arithmetics.", "double"); config.addEntryEnum("float"); config.addEntryEnum("double"); @@ -77,7 +84,7 @@ void HeatmapSolverBenchmark::exec(const typename HeatmapSolver::Parameters auto result = solver.template solve(params); if (!result) - std::cout << "Fail to solve for grid size (" << params.xSize << ", " << params.ySize << ")" << std::endl; + printf("Fail to solve for grid size (%d,%d)", params.xSize, params.ySize); } template @@ -169,16 +176,22 @@ int main(int argc, char* argv[]) { std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata(); TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" ); - if( precision == "all" || precision == "float" ) - solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); - if( precision == "all" || precision == "double" ) - solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + auto device = parameters.getParameter("device"); + + if (device == "host") { + if(precision == "all" || precision == "float") + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + if(precision == "all" || precision == "double") + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + } #ifdef HAVE_CUDA - if( precision == "all" || precision == "float" ) - solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); - if( precision == "all" || precision == "double" ) - solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + if (device == "cuda") { + if( precision == "all" || precision == "float" ) + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + if( precision == "all" || precision == "double" ) + solver.runBenchmark(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); + } #endif return EXIT_SUCCESS; diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h index ed8ea004d..ce9361fac 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h @@ -204,10 +204,10 @@ class Grid { traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions); auto outerFunction = [=] __cuda_callable__(Index offset, - const Container& traverseRectOrigin, - const Container& traverseRectDimensions, - const Container& traverseRectDimensionsProducts, - const Container& dimensionsProducts, + const Container traverseRectOrigin, + const Container traverseRectDimensions, + const Container traverseRectDimensionsProducts, + const Container dimensionsProducts, FunctionArgs... args) mutable { auto entity = this -> makeEntitity(offset, traverseRectOrigin, traverseRectDimensions, @@ -237,8 +237,8 @@ class Grid { * * For example, let's have a 3-d grid, then the map indexing will be the next: * 0 - 0 - count of vertices - * 1, 2, 3 - count of edges in z, y, x plane - * 4, 5, 6 - count of faces in yz, xz, xy plane + * 1, 2, 3 - count of edges in x, y, z plane + * 4, 5, 6 - count of faces in xy, yz, zy plane * 7 - count of cells in z y x plane * * @warning - The ordering of is lexigraphical. @@ -268,7 +268,7 @@ class Grid { int result = 1; for (std::size_t k = 0; k < combinationBuffer.size(); k++) - result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k]; + result *= combinationBuffer[k] ? dimensions[Dimension - k - 1] - 1 : dimensions[Dimension - k - 1]; dimensionMap[j] = result; cumulativeDimensionMap[i] += result; @@ -279,12 +279,13 @@ class Grid { } __cuda_callable__ inline - GridEntity makeEntitity(const Index index, + GridEntity makeEntitity(const Index& index, const Container& traverseRectOrigin, const Container& traverseRectDimensions, const Container& traverseRectDimensionsProducts, const Container& dimensionsProducts) const { - const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions); + //Container traverseCoordinates = 0; + Container traverseCoordinates = getCoordinates(index, traverseRectDimensions); Container globalCoordinates = 0; for (Index i = 0; i < Dimension; i++) @@ -298,22 +299,23 @@ class Grid { * Calculates position in the specific boundaries */ __cuda_callable__ inline - Container getCoordinates(const Index index, const Container &dimensions) const { + Container getCoordinates(const Index& index, + const Container &dimensions) const { Container coordinates = 0; Index tmpIndex = index; - Index dim = Dimension - 1; + Index dimensionIndex = 0; // TODO: - Implement overflow check. - while (tmpIndex) { - Index dimension = dimensions[dim], + while (tmpIndex && dimensionIndex < Dimension) { + Index dimension = dimensions[dimensionIndex], quotient = tmpIndex / dimension, reminder = tmpIndex - (dimension * quotient); - coordinates[dim] = reminder; + coordinates[dimensionIndex] = reminder; tmpIndex = quotient; - dim -= 1; + dimensionIndex += 1; } return coordinates; @@ -325,8 +327,10 @@ class Grid { Container getDimensionProducts(const Container& dimensions) const noexcept { Container products = 0; - for (Index i = Dimension; i > 0; i--) - products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i]; + products[0] = 1; + + for (Index i = 1; i < Dimension; i++) + products[i] = dimensions[i - 1] * products[i - 1]; return products; } @@ -370,7 +374,7 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c ux = 0; aux = 0; - auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { + auto init = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { auto position = entity.getCoordinates(); auto index = entity.getIndex(); diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt index b134a4b22..8cc2bdf69 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt @@ -1,5 +1,5 @@ if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) + CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) ELSE() - add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) + add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h index c3192eb13..31c5ded3a 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h @@ -18,16 +18,18 @@ int main(int argc, char *argv[]) { if (!parseCommandLine(argc, argv, config, parameters)) return EXIT_FAILURE; + parameters.addParameter("outputData", true); + auto device = parameters.getParameter("device"); auto params = HeatmapSolver::Parameters(parameters); HeatmapSolver solver; - if (device == "host" && !solver.solve(params)) + if (device == "host" && !solver.template solve(params)) return EXIT_FAILURE; #ifdef HAVE_CUDA - if (device == "cuda" && !solver.solve(params)) + if (device == "cuda" && !solver.template solve(params)) return EXIT_FAILURE; #endif diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt index 5672634e3..58c97ef0d 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/CMakeLists.txt @@ -1,5 +1,2 @@ -if (BUILD_CUDA) - CUDA_ADD_EXECUTABLE( benchmark_heat_tnl_grid main.h main.cu ) -ELSE() - add_executable( benchmark_heat_tnl_grid main.h main.cpp ) -ENDIF() +ADD_SUBDIRECTORY( benchmark ) +ADD_SUBDIRECTORY( solution ) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt new file mode 100644 index 000000000..f8f5f4e5f --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( heat_tnl_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) +ELSE() + add_executable( heat_tnl_grid_benchmark ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cpp similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cpp rename to src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cpp diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cu similarity index 100% rename from src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.cu rename to src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.cu diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h new file mode 100644 index 000000000..64defc7d1 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/benchmark/main.h @@ -0,0 +1,5 @@ +#include "../../Base/HeatmapSolver.h" +#include "../../Base/HeatmapSolverBenchmark.h" +#include "../implementation.h" + +#pragma once diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h new file mode 100644 index 000000000..be4d1a13d --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h @@ -0,0 +1,77 @@ +#pragma once + +#include "TNL/Meshes/GridDetails/Grid2D.h" +#include "../Base/HeatmapSolver.h" + +template +template +bool HeatmapSolver::solve(const HeatmapSolver::Parameters& params) const { + using Grid2D = TNL::Meshes::Grid<2, Real, Device, int>; + + Grid2D grid; + + // Grid implementation defines its dimensions in the amount of edges. + // To align it size to all other benchmarks substract 1 + grid.setDimensions(params.xSize - 1, params.ySize - 1); + + // TODO: - Improve style of access. It is counterintuitive for person, who doesn't know C++ well + int verticesCount = grid.template getEntitiesCount<0>(); + + TNL::Containers::Array ux(verticesCount), // data at step u + aux(verticesCount);// data at step u + 1 + + // Invalidate ux/aux + ux = 0; + aux = 0; + + const Real hx = params.xDomainSize / (Real)params.xSize; + const Real hy = params.yDomainSize / (Real)params.ySize; + const Real hx_inv = 1 / (hx * hx); + const Real hy_inv = 1 / (hy * hy); + + auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + + auto uxView = ux.getView(), + auxView = aux.getView(); + + auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable { + auto index = entity.getIndex(); + + auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2; + auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2; + + uxView[index] = exp(params.sigma * (x * x + y * y)); + }; + + grid.template forInterior<0>(init); + + if (!writeGNUPlot("data.txt", params, ux)) + return false; + + auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { + auto index = entity.getIndex(); + auto width = grid.getDimensions().x() + 1; + + auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + + (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv; + }; + + auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { + auto index = entity.getIndex(); + + uxView[index] += auxView[index] * timestep; + }; + + Real start = 0; + + while (start < params.finalTime) { + grid.template forInterior<0>(next); + grid.template forInterior<0>(update); + + start += timestep; + } + + return writeGNUPlot("data_final.txt", params, ux); + + return false; +} diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h deleted file mode 100644 index 82b27213d..000000000 --- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/main.h +++ /dev/null @@ -1,168 +0,0 @@ - -#pragma once - -#include "TNL/Meshes/GridDetails/Grid2D.h" -#include "../Base/HeatmapSolver.h" - -template -template -bool HeatmapSolver::solve(const HeatmapSolver::Parameters& params) const { - using Grid2D = TNL::Meshes::Grid<2, Real, Device, int>; - - Grid2D grid; - - // Grid implementation defines its dimensions in the amount of edges. - // To align it size to all other benchmarks substract 1 - grid.setDimensions(params.xSize - 1, params.ySize - 1); - - // TODO: - Improve style of access. It is counterintuitive for person, who doesn't know C++ well - int verticesCount = grid.template getEntitiesCount<0>(); - - TNL::Containers::Array ux(verticesCount), // data at step u - aux(verticesCount);// data at step u + 1 - - // Invalidate ux/aux - ux = 0; - aux = 0; - - const Real hx = params.xDomainSize / (Real)params.xSize; - const Real hy = params.yDomainSize / (Real)params.ySize; - const Real hx_inv = 1 / (hx * hx); - const Real hy_inv = 1 / (hy * hy); - - auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); - - auto uxView = ux.getView(), - auxView = aux.getView(); - - auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable { - auto index = entity.getIndex(); - - auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2; - auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2; - - uxView[index] = exp(params.sigma * (x * x + y * y)); - }; - - grid.template forInterior<0>(init); - - if (!writeGNUPlot("data.txt", params, ux)) - return false; - - auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { - auto index = entity.getIndex(); - auto width = grid.getDimensions().x() + 1; - - auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + - (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv; - }; - - auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { - auto index = entity.getIndex(); - - uxView[index] += auxView[index] * timestep; - }; - - Real start = 0; - - while (start < params.finalTime) { - grid.template forInterior<0>(next); - grid.template forInterior<0>(update); - - start += timestep; - } - - return writeGNUPlot("data_final.txt", params, ux); - - return false; -} - -int main(int argc, char* argv[]) { - using Real = double; - - auto config = HeatmapSolver::Parameters::makeInputConfig(); - - TNL::Config::ParameterContainer parameters; - if (!parseCommandLine(argc, argv, config, parameters)) - return EXIT_FAILURE; - - auto device = parameters.getParameter("device"); - auto params = HeatmapSolver::Parameters(parameters); - - HeatmapSolver solver; - - if (device == "host" && !solver.solve(params)) - return EXIT_FAILURE; - -#ifdef HAVE_CUDA - if (device == "cuda" && !solver.solve(params)) - return EXIT_FAILURE; -#endif - - return EXIT_SUCCESS; -} - - -// TODO: - Move to tests -/* -using Grid1D = TNL::Meshes::Grid<1, float, TNL::Devices::Cuda, int>; -using Grid2D = TNL::Meshes::Grid<2, float, TNL::Devices::Cuda, int>; - -void check1D() { - Grid1D grid; - - grid.setDimensions({ 100 }); - - auto fn0 = [=] __cuda_callable__(const Grid1D::EntityType<0>&entity, int variant) mutable { - printf("%d %d\n", variant, entity.getCoordinates().x()); - }; - - auto fn1 = [=] __cuda_callable__(const Grid1D::EntityType<1>&entity, int variant) mutable { - printf("%d %d\n", variant, entity.getCoordinates().x()); - }; - - grid.forAll<0>(fn0, 0); - grid.forInterior<0>(fn0, 1); - grid.forBoundary<0>(fn0, 2); - - grid.forAll<1>(fn1, 3); - grid.forInterior<1>(fn1, 4); - grid.forBoundary<1>(fn1, 5); -} - -void check2D() { - Grid2D grid; - - grid.setDimensions({ 10, 10 }); - - auto fn0 = [=] __cuda_callable__(const Grid2D::EntityType<0>&entity, int variant) mutable { - printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); - }; - - auto fn1 = [=] __cuda_callable__(const Grid2D::EntityType<1>&entity, int variant) mutable { - printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); - }; - - auto fn2 = [=] __cuda_callable__(const Grid2D::EntityType<2>&entity, int variant) mutable { - printf("%d %d %d\n", variant, entity.getCoordinates().x(), entity.getCoordinates().y()); - }; - - std::cout << "Entities count of 0: dimension" << grid.getEntitiesCount<0>() << std::endl; - - grid.forAll<0>(fn0, 0); - grid.forInterior<0>(fn0, 1); - grid.forBoundary<0>(fn0, 2); - - grid.forAll<1>(fn1, 3); - grid.forInterior<1>(fn1, 4); - grid.forBoundary<1>(fn1, 5); - - grid.forAll<2>(fn2, 6); - grid.forInterior<2>(fn2, 7); - grid.forBoundary<2>(fn2, 8); -} - -int main(int argc, char *argv[]) { - return 0; -} -*/ diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt new file mode 100644 index 000000000..d5efbfc81 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/CMakeLists.txt @@ -0,0 +1,5 @@ +if (BUILD_CUDA) + CUDA_ADD_EXECUTABLE( heat_tnl_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) +ELSE() + add_executable( heat_tnl_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) +ENDIF() diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cpp @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu new file mode 100644 index 000000000..7a7a28956 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.cu @@ -0,0 +1,2 @@ + +#include "main.h" diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h new file mode 100644 index 000000000..6d888d753 --- /dev/null +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/solution/main.h @@ -0,0 +1,37 @@ + +#include +#include +#include +#include +#include +#include + +#include "../../Base/HeatmapSolver.h" +#include "../implementation.h" + +int main(int argc, char *argv[]) { + using Real = double; + + auto config = HeatmapSolver::Parameters::makeInputConfig(); + + TNL::Config::ParameterContainer parameters; + if (!parseCommandLine(argc, argv, config, parameters)) + return EXIT_FAILURE; + + parameters.addParameter("outputData", true); + + auto device = parameters.getParameter("device"); + auto params = HeatmapSolver::Parameters(parameters); + + HeatmapSolver solver; + + if (device == "host" && !solver.solve(params)) + return EXIT_FAILURE; + +#ifdef HAVE_CUDA + if (device == "cuda" && !solver.solve(params)) + return EXIT_FAILURE; +#endif + + return EXIT_SUCCESS; +} -- GitLab From 66648e7333f2ffc74ab699700de32e327e012f83 Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 9 Dec 2021 14:27:41 +0100 Subject: [PATCH 28/29] Remove basis from the GridEntity --- src/Benchmarks/CMakeLists.txt | 2 +- src/TNL/Meshes/GridDetails/Grid2D_impl.h | 19 ++--- src/TNL/Meshes/GridDetails/GridEntity_impl.h | 73 ++------------------ src/TNL/Meshes/GridEntity.h | 32 +-------- 4 files changed, 13 insertions(+), 113 deletions(-) diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt index 6fa019081..985f4f162 100644 --- a/src/Benchmarks/CMakeLists.txt +++ b/src/Benchmarks/CMakeLists.txt @@ -1,4 +1,4 @@ -add_subdirectory( HeatEquation ) +#add_subdirectory( HeatEquation ) add_subdirectory( HeatEquationGrid ) add_subdirectory( BLAS ) add_subdirectory( NDArray ) diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h index 65da95107..d10c63811 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h @@ -438,9 +438,8 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { auto outerOriented = [=] __cuda_callable__(Index i, Index j, const Grid<2, Real, Device, Index>&grid, const CoordinatesType & orientation, - const CoordinatesType & basis, FuncArgs... args) mutable { - EntityType entity(grid, CoordinatesType(i, j), orientation, basis); + EntityType entity(grid, CoordinatesType(i, j), orientation); entity.refresh(); @@ -452,7 +451,6 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { outerOriented, *this, CoordinatesType(1, 0), - CoordinatesType(0, 1), args...); TNL::Algorithms::ParallelFor2D::exec(0, 0, @@ -460,7 +458,6 @@ void Grid<2, Real, Device, Index>::forAll(Func func, FuncArgs... args) const { outerOriented, *this, CoordinatesType(0, 1), - CoordinatesType(1, 0), args...); break; } @@ -498,9 +495,8 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons auto outerOriented = [=] __cuda_callable__(Index i, Index j, Grid<2, Real, Device, Index>& grid, const CoordinatesType& orientation, - const CoordinatesType& basis, FuncArgs... args) mutable { - EntityType entity(grid, CoordinatesType(i, j), orientation, basis); + EntityType entity(grid, CoordinatesType(i, j), orientation); entity.refresh(); @@ -512,7 +508,6 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons outerOriented, *this, CoordinatesType(1, 0), - CoordinatesType(0, 1), args...); TNL::Algorithms::ParallelFor2D::exec(0, 1, @@ -520,7 +515,6 @@ void Grid<2, Real, Device, Index>::forInterior(Func func, FuncArgs... args) cons outerOriented, *this, CoordinatesType(0, 1), - CoordinatesType(1, 0), args...); break; } @@ -576,8 +570,7 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons Index axis, Index axisIndex, const Grid<2, Real, Device, Index>& grid, - const CoordinatesType & orientation, - const CoordinatesType & basis, + const CoordinatesType& orientation, FuncArgs... args) mutable { CoordinatesType coordinates; @@ -593,7 +586,7 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons default: TNL_ASSERT_TRUE(false, "Received axis index. Expect in range [0..<1]"); } - EntityType entity(grid, coordinates, orientation, basis); + EntityType entity(grid, coordinates, orientation); entity.refresh(); @@ -607,7 +600,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons 0, 0, *this, CoordinatesType(1, 0), - CoordinatesType(0, 1), args...); // Upper horizontal TNL::Algorithms::ParallelFor::exec(0, @@ -616,7 +608,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons 0, dimensions.y(), *this, CoordinatesType(1, 0), - CoordinatesType(0, 1), args...); // Left vertical TNL::Algorithms::ParallelFor::exec(0, @@ -625,7 +616,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons 1, 0, *this, CoordinatesType(0, 1), - CoordinatesType(1, 0), args...); // Right vertical TNL::Algorithms::ParallelFor::exec(0, @@ -634,7 +624,6 @@ void Grid<2, Real, Device, Index>::forBoundary(Func func, FuncArgs... args) cons 1, dimensions.x(), *this, CoordinatesType(0, 1), - CoordinatesType(1, 0), args...); break; } diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h index 922673de5..5136e6da7 100644 --- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h +++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h @@ -31,9 +31,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid ) : grid( grid ), entityIndex( -1 ), coordinates( 0 ), - orientation( 0 ), - basis( 0 ) -{ + orientation( 0 ){ } template< int Dimension, @@ -46,14 +44,11 @@ __cuda_callable__ inline GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >:: GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation, - const EntityBasisType& basis ) + const EntityOrientationType& orientation ) : grid( grid ), entityIndex( -1 ), coordinates( coordinates ), - orientation( orientation ), - basis( basis ) -{ + orientation( orientation ){ } template< int Dimension, @@ -159,37 +154,8 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Con setOrientation( const EntityOrientationType& orientation ) { this->orientation = orientation; - this->basis = EntityBasisType( 1 ) - abs( orientation ); -} - -template< int Dimension, - typename Real, - typename Device, - typename Index, - int EntityDimension, - typename Config > -__cuda_callable__ inline -const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::EntityBasisType& -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >:: -getBasis() const -{ - return this->basis; } -template< int Dimension, - typename Real, - typename Device, - typename Index, - int EntityDimension, - typename Config > -__cuda_callable__ inline -void -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >:: -setBasis( const EntityBasisType& basis ) -{ - this->basis = basis; - this->orientation = EntityOrientationType( 1 ) - abs( basis ); -} template< int Dimension, typename Real, @@ -270,8 +236,7 @@ __cuda_callable__ inline GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:: GridEntity( const GridType& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation, - const EntityBasisType& basis ) + const EntityOrientationType& orientation) : grid( grid ), entityIndex( -1 ), coordinates( coordinates ) @@ -361,19 +326,6 @@ getOrientation() const return EntityOrientationType( ( IndexType ) 0 ); } -template< int Dimension, - typename Real, - typename Device, - typename Index, - typename Config > -__cuda_callable__ inline -const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::EntityBasisType -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:: -getBasis() const -{ - return EntityBasisType( ( IndexType ) 1 ); -} - template< int Dimension, typename Real, typename Device, @@ -458,7 +410,6 @@ GridEntity( const GridType& grid ) { } -// Basis is Discarded template< int Dimension, typename Real, typename Device, @@ -468,8 +419,7 @@ __cuda_callable__ inline GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >:: GridEntity( const GridType& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation, - const EntityBasisType& basis ) + const EntityOrientationType& orientation) : grid( grid ), entityIndex( -1 ), coordinates( coordinates ) @@ -561,19 +511,6 @@ getOrientation() const return EntityOrientationType( ( IndexType ) 0 ); } -template< int Dimension, - typename Real, - typename Device, - typename Index, - typename Config > -__cuda_callable__ inline -const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::EntityBasisType -GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >:: -getBasis() const -{ - return EntityBasisType( ( IndexType ) 0 ); -} - template< int Dimension, typename Real, typename Device, diff --git a/src/TNL/Meshes/GridEntity.h b/src/TNL/Meshes/GridEntity.h index 19e13c531..05720681f 100644 --- a/src/TNL/Meshes/GridEntity.h +++ b/src/TNL/Meshes/GridEntity.h @@ -57,7 +57,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio constexpr static int getEntityDimension() { return EntityDimension; }; typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType; - typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType; typedef typename GridType::PointType PointType; template< int NeighborEntityDimension = getEntityDimension() > @@ -74,8 +73,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio __cuda_callable__ inline GridEntity( const GridType& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation, - const EntityBasisType& basis ); + const EntityOrientationType& orientation); __cuda_callable__ inline const CoordinatesType& getCoordinates() const; @@ -104,12 +102,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio __cuda_callable__ inline void setOrientation( const EntityOrientationType& orientation ); - __cuda_callable__ inline - const EntityBasisType& getBasis() const; - - __cuda_callable__ inline - void setBasis( const EntityBasisType& basis ); - __cuda_callable__ inline bool isBoundaryEntity() const; @@ -132,8 +124,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio EntityOrientationType orientation; - EntityBasisType basis; - //__cuda_callable__ inline //GridEntity(); @@ -167,7 +157,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con constexpr static int getEntityDimension() { return getMeshDimension(); }; typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType; - typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType; template< int NeighborEntityDimension = getEntityDimension() > using NeighborEntities = @@ -184,8 +173,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con __cuda_callable__ inline GridEntity( const GridType& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ), - const EntityBasisType& basis = EntityBasisType( ( Index ) 1 ) ); + const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ) ); __cuda_callable__ inline const CoordinatesType& getCoordinates() const; @@ -214,12 +202,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con __cuda_callable__ inline void setOrientation( const EntityOrientationType& orientation ){}; - __cuda_callable__ inline - const EntityBasisType getBasis() const; - - __cuda_callable__ inline - void setBasis( const EntityBasisType& basis ){}; - __cuda_callable__ inline bool isBoundaryEntity() const; @@ -276,7 +258,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config > constexpr static int getEntityDimension() { return 0; }; typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType; - typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType; template< int NeighborEntityDimension = getEntityDimension() > using NeighborEntities = @@ -293,8 +274,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config > __cuda_callable__ inline GridEntity( const GridType& grid, const CoordinatesType& coordinates, - const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ), - const EntityBasisType& basis = EntityOrientationType( ( Index ) 0 ) ); + const EntityOrientationType& orientation = EntityOrientationType( ( Index ) 0 ) ); __cuda_callable__ inline const CoordinatesType& getCoordinates() const; @@ -323,12 +303,6 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config > __cuda_callable__ inline void setOrientation( const EntityOrientationType& orientation ){}; - __cuda_callable__ inline - const EntityBasisType getBasis() const; - - __cuda_callable__ inline - void setBasis( const EntityBasisType& basis ){}; - __cuda_callable__ inline bool isBoundaryEntity() const; -- GitLab From 82acde6411221738a8a16a450e7a86370a4f4d6e Mon Sep 17 00:00:00 2001 From: hayeuyur Date: Thu, 9 Dec 2021 14:28:30 +0100 Subject: [PATCH 29/29] Optimization fixes for tnl-grid and parallel for --- .../HeatmapNDimGrid/implementation.h | 18 ++++++++------ .../HeatmapParallelFor/implementation.h | 24 ++++++++++++------- .../HeatmapTNLGrid/implementation.h | 21 +++++++++------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h index ce9361fac..3a50efd47 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h @@ -359,11 +359,14 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c const Real hx = params.xDomainSize / (Real)grid.getDimension(0); const Real hy = params.yDomainSize / (Real)grid.getDimension(1); - const Real hx_inv = 1 / (hx * hx); - const Real hy_inv = 1 / (hy * hy); + const Real hx_inv = 1. / (hx * hx); + const Real hy_inv = 1. / (hy * hy); auto entitiesCount = grid.getEntitiesCount(0); auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); + auto xDomainSize = params.xDomainSize; + auto yDomainSize = params.yDomainSize; + auto sigma = params.sigma; TNL::Containers::Array ux(entitiesCount), // data at step u aux(entitiesCount);// data at step u + 1 @@ -378,10 +381,10 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c auto position = entity.getCoordinates(); auto index = entity.getIndex(); - auto x = position[0] * hx - params.xDomainSize / 2; - auto y = position[1] * hx - params.yDomainSize / 2; + auto x = position[0] * hx - xDomainSize / 2; + auto y = position[1] * hx - yDomainSize / 2; - uxView[index] = exp(params.sigma * (x * x + y * y)); + uxView[index] = exp(sigma * (x * x + y * y)); }; const Container<2, bool> direction{ false, false }; @@ -398,9 +401,10 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c auto next = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { auto index = entity.getIndex(); + auto center = 2 * uxView[index]; - auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + - (uxView[index - xDimension] - 2 * uxView[index] + uxView[index + xDimension]) * hy_inv; + auxView[index] = (uxView[index - 1] - center + uxView[index + 1]) * hx_inv + + (uxView[index - xDimension] - center + uxView[index + xDimension]) * hy_inv; }; auto update = [=] __cuda_callable__(int i) mutable { diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h index c8ad25eb9..c16936090 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/implementation.h @@ -39,21 +39,26 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c const Real hx = params.xDomainSize / (Real)params.xSize; const Real hy = params.yDomainSize / (Real)params.ySize; - const Real hx_inv = 1 / (hx * hx); - const Real hy_inv = 1 / (hy * hy); + const Real hx_inv = 1. / (hx * hx); + const Real hy_inv = 1. / (hy * hy); auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); auto uxView = ux.getView(), auxView = aux.getView(); + auto xSize = params.xSize; + auto xDomainSize = params.xDomainSize; + auto yDomainSize = params.yDomainSize; + auto sigma = params.sigma; + auto init = [=] __cuda_callable__(int i, int j) mutable { - auto index = j * params.xSize + i; + auto index = j * xSize + i; - auto x = i * hx - params.xDomainSize / 2; - auto y = j * hy - params.yDomainSize / 2; + auto x = i * hx - xDomainSize / 2.; + auto y = j * hy - yDomainSize / 2.; - uxView[index] = exp(params.sigma * (x * x + y * y)); + uxView[index] = exp(sigma * (x * x + y * y)); }; TNL::Algorithms::ParallelFor2D::exec(1, 1, params.xSize - 1, params.ySize - 1, init); @@ -63,10 +68,11 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters ¶ms) c auto next = [=] __cuda_callable__(int i, int j) mutable { - auto index = j * params.xSize + i; + auto index = j * xSize + i; + auto center = 2 * uxView[index]; - auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + - (uxView[index - params.xSize] - 2 * uxView[index] + uxView[index + params.xSize]) * hy_inv; + auxView[index] = (uxView[index - 1] - center + uxView[index + 1]) * hx_inv + + (uxView[index - xSize] - center + uxView[index + xSize]) * hy_inv; }; auto update = [=] __cuda_callable__(int i) mutable diff --git a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h index be4d1a13d..f50b9546a 100644 --- a/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h +++ b/src/Benchmarks/HeatEquationGrid/HeatmapTNLGrid/implementation.h @@ -26,21 +26,25 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters& params) c const Real hx = params.xDomainSize / (Real)params.xSize; const Real hy = params.yDomainSize / (Real)params.ySize; - const Real hx_inv = 1 / (hx * hx); - const Real hy_inv = 1 / (hy * hy); + const Real hx_inv = 1. / (hx * hx); + const Real hy_inv = 1. / (hy * hy); auto timestep = params.timeStep ? params.timeStep : std::min(hx * hx, hy * hy); auto uxView = ux.getView(), auxView = aux.getView(); + auto xDomainSize = params.xDomainSize; + auto yDomainSize = params.yDomainSize; + auto sigma = params.sigma; + auto init = [=] __cuda_callable__(const typename Grid2D::EntityType<0> &entity) mutable { auto index = entity.getIndex(); - auto x = entity.getCoordinates().x() * hx - params.xDomainSize / 2; - auto y = entity.getCoordinates().y() * hy - params.yDomainSize / 2; + auto x = entity.getCoordinates().x() * hx - xDomainSize / 2.; + auto y = entity.getCoordinates().y() * hy - yDomainSize / 2.; - uxView[index] = exp(params.sigma * (x * x + y * y)); + uxView[index] = exp(sigma * (x * x + y * y)); }; grid.template forInterior<0>(init); @@ -48,12 +52,13 @@ bool HeatmapSolver::solve(const HeatmapSolver::Parameters& params) c if (!writeGNUPlot("data.txt", params, ux)) return false; + auto width = grid.getDimensions().x() + 1; auto next = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { auto index = entity.getIndex(); - auto width = grid.getDimensions().x() + 1; + auto center = 2 * uxView[index]; - auxView[index] = (uxView[index - 1] - 2 * uxView[index] + uxView[index + 1]) * hx_inv + - (uxView[index - width] - 2 * uxView[index] + uxView[index + width]) * hy_inv; + auxView[index] = (uxView[index - 1] - center + uxView[index + 1]) * hx_inv + + (uxView[index - width] - center + uxView[index + width]) * hy_inv; }; auto update = [=] __cuda_callable__(const typename Grid2D::EntityType<0>&entity) mutable { -- GitLab