Loading src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h +1 −1 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig( { TNL::Config::ConfigDescription config; config.addEntry<TNL::String>("device", "Device the computation will run on.", "host"); config.addEntry<TNL::String>("device", "Device the computation will run on.", "cuda"); config.addEntryEnum<TNL::String>("host"); #ifdef HAVE_CUDA Loading src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h +22 −9 Original line number Diff line number Diff line Loading @@ -37,6 +37,13 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() { config.addEntryEnum("append"); config.addEntryEnum("overwrite"); config.addEntry<TNL::String>("device", "Device the computation will run on.", "cuda"); config.addEntryEnum<TNL::String>("host"); #ifdef HAVE_CUDA config.addEntryEnum<TNL::String>("cuda"); #endif config.addEntry<TNL::String>("precision", "Precision of the arithmetics.", "double"); config.addEntryEnum("float"); config.addEntryEnum("double"); Loading Loading @@ -77,7 +84,7 @@ void HeatmapSolverBenchmark::exec(const typename HeatmapSolver<Real>::Parameters auto result = solver.template solve<Device>(params); if (!result) std::cout << "Fail to solve for grid size (" << params.xSize << ", " << params.ySize << ")" << std::endl; printf("Fail to solve for grid size (%d,%d)", params.xSize, params.ySize); } template<typename Real, typename Device> Loading Loading @@ -169,16 +176,22 @@ int main(int argc, char* argv[]) { std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata(); TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" ); auto device = parameters.getParameter<TNL::String>("device"); if (device == "host") { if(precision == "all" || precision == "float") solver.runBenchmark<float, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); if(precision == "all" || precision == "double") solver.runBenchmark<double, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); } #ifdef HAVE_CUDA if (device == "cuda") { if( precision == "all" || precision == "float" ) solver.runBenchmark<float, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); if( precision == "all" || precision == "double" ) solver.runBenchmark<double, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); } #endif return EXIT_SUCCESS; Loading src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h +22 −18 Original line number Diff line number Diff line Loading @@ -204,10 +204,10 @@ class Grid { traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions); auto outerFunction = [=] __cuda_callable__(Index offset, const Container<Dimension, Index>& traverseRectOrigin, const Container<Dimension, Index>& traverseRectDimensions, const Container<Dimension, Index>& traverseRectDimensionsProducts, const Container<Dimension, Index>& dimensionsProducts, const Container<Dimension, Index> traverseRectOrigin, const Container<Dimension, Index> traverseRectDimensions, const Container<Dimension, Index> traverseRectDimensionsProducts, const Container<Dimension, Index> dimensionsProducts, FunctionArgs... args) mutable { auto entity = this -> makeEntitity(offset, traverseRectOrigin, traverseRectDimensions, Loading Loading @@ -237,8 +237,8 @@ class Grid { * * For example, let's have a 3-d grid, then the map indexing will be the next: * 0 - 0 - count of vertices * 1, 2, 3 - count of edges in z, y, x plane * 4, 5, 6 - count of faces in yz, xz, xy plane * 1, 2, 3 - count of edges in x, y, z plane * 4, 5, 6 - count of faces in xy, yz, zy plane * 7 - count of cells in z y x plane * * @warning - The ordering of is lexigraphical. Loading Loading @@ -268,7 +268,7 @@ class Grid { int result = 1; for (std::size_t k = 0; k < combinationBuffer.size(); k++) result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k]; result *= combinationBuffer[k] ? dimensions[Dimension - k - 1] - 1 : dimensions[Dimension - k - 1]; dimensionMap[j] = result; cumulativeDimensionMap[i] += result; Loading @@ -279,12 +279,13 @@ class Grid { } __cuda_callable__ inline GridEntity<Dimension, Index> makeEntitity(const Index index, GridEntity<Dimension, Index> makeEntitity(const Index& index, const Container<Dimension, Index>& traverseRectOrigin, const Container<Dimension, Index>& traverseRectDimensions, const Container<Dimension, Index>& traverseRectDimensionsProducts, const Container<Dimension, Index>& dimensionsProducts) const { const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions); //Container<Dimension, Index> traverseCoordinates = 0; Container<Dimension, Index> traverseCoordinates = getCoordinates(index, traverseRectDimensions); Container<Dimension, Index> globalCoordinates = 0; for (Index i = 0; i < Dimension; i++) Loading @@ -298,22 +299,23 @@ class Grid { * Calculates position in the specific boundaries */ __cuda_callable__ inline Container<Dimension, Index> getCoordinates(const Index index, const Container<Dimension, Index> &dimensions) const { Container<Dimension, Index> getCoordinates(const Index& index, const Container<Dimension, Index> &dimensions) const { Container<Dimension, Index> coordinates = 0; Index tmpIndex = index; Index dim = Dimension - 1; Index dimensionIndex = 0; // TODO: - Implement overflow check. while (tmpIndex) { Index dimension = dimensions[dim], while (tmpIndex && dimensionIndex < Dimension) { Index dimension = dimensions[dimensionIndex], quotient = tmpIndex / dimension, reminder = tmpIndex - (dimension * quotient); coordinates[dim] = reminder; coordinates[dimensionIndex] = reminder; tmpIndex = quotient; dim -= 1; dimensionIndex += 1; } return coordinates; Loading @@ -325,8 +327,10 @@ class Grid { Container<Dimension, Index> getDimensionProducts(const Container<Dimension, Index>& dimensions) const noexcept { Container<Dimension, Index> products = 0; for (Index i = Dimension; i > 0; i--) products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i]; products[0] = 1; for (Index i = 1; i < Dimension; i++) products[i] = dimensions[i - 1] * products[i - 1]; return products; } Loading Loading @@ -370,7 +374,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters ¶ms) c ux = 0; aux = 0; auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { auto init = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { auto position = entity.getCoordinates(); auto index = entity.getIndex(); Loading src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt +2 −2 Original line number Diff line number Diff line if (BUILD_CUDA) CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) ELSE() add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) ENDIF() src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h +4 −2 Original line number Diff line number Diff line Loading @@ -18,16 +18,18 @@ int main(int argc, char *argv[]) { if (!parseCommandLine(argc, argv, config, parameters)) return EXIT_FAILURE; parameters.addParameter("outputData", true); auto device = parameters.getParameter<TNL::String>("device"); auto params = HeatmapSolver<Real>::Parameters(parameters); HeatmapSolver<Real> solver; if (device == "host" && !solver.solve<TNL::Devices::Host>(params)) if (device == "host" && !solver.template solve<TNL::Devices::Host>(params)) return EXIT_FAILURE; #ifdef HAVE_CUDA if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params)) if (device == "cuda" && !solver.template solve<TNL::Devices::Cuda>(params)) return EXIT_FAILURE; #endif Loading Loading
src/Benchmarks/HeatEquationGrid/Base/HeatmapSolver.h +1 −1 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ TNL::Config::ConfigDescription HeatmapSolver<Real>::Parameters::makeInputConfig( { TNL::Config::ConfigDescription config; config.addEntry<TNL::String>("device", "Device the computation will run on.", "host"); config.addEntry<TNL::String>("device", "Device the computation will run on.", "cuda"); config.addEntryEnum<TNL::String>("host"); #ifdef HAVE_CUDA Loading
src/Benchmarks/HeatEquationGrid/Base/HeatmapSolverBenchmark.h +22 −9 Original line number Diff line number Diff line Loading @@ -37,6 +37,13 @@ TNL::Config::ConfigDescription HeatmapSolverBenchmark::makeInputConfig() { config.addEntryEnum("append"); config.addEntryEnum("overwrite"); config.addEntry<TNL::String>("device", "Device the computation will run on.", "cuda"); config.addEntryEnum<TNL::String>("host"); #ifdef HAVE_CUDA config.addEntryEnum<TNL::String>("cuda"); #endif config.addEntry<TNL::String>("precision", "Precision of the arithmetics.", "double"); config.addEntryEnum("float"); config.addEntryEnum("double"); Loading Loading @@ -77,7 +84,7 @@ void HeatmapSolverBenchmark::exec(const typename HeatmapSolver<Real>::Parameters auto result = solver.template solve<Device>(params); if (!result) std::cout << "Fail to solve for grid size (" << params.xSize << ", " << params.ySize << ")" << std::endl; printf("Fail to solve for grid size (%d,%d)", params.xSize, params.ySize); } template<typename Real, typename Device> Loading Loading @@ -169,16 +176,22 @@ int main(int argc, char* argv[]) { std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata(); TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" ); auto device = parameters.getParameter<TNL::String>("device"); if (device == "host") { if(precision == "all" || precision == "float") solver.runBenchmark<float, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); if(precision == "all" || precision == "double") solver.runBenchmark<double, TNL::Devices::Host>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); } #ifdef HAVE_CUDA if (device == "cuda") { if( precision == "all" || precision == "float" ) solver.runBenchmark<float, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); if( precision == "all" || precision == "double" ) solver.runBenchmark<double, TNL::Devices::Cuda>(benchmark, minXDimension, maxXDimension, xSizeStepFactor, minYDimension, maxYDimension, ySizeStepFactor, parameters); } #endif return EXIT_SUCCESS; Loading
src/Benchmarks/HeatEquationGrid/HeatmapNDimGrid/implementation.h +22 −18 Original line number Diff line number Diff line Loading @@ -204,10 +204,10 @@ class Grid { traverseRectDimensionsProducts = getDimensionProducts(traverseRectDimensions); auto outerFunction = [=] __cuda_callable__(Index offset, const Container<Dimension, Index>& traverseRectOrigin, const Container<Dimension, Index>& traverseRectDimensions, const Container<Dimension, Index>& traverseRectDimensionsProducts, const Container<Dimension, Index>& dimensionsProducts, const Container<Dimension, Index> traverseRectOrigin, const Container<Dimension, Index> traverseRectDimensions, const Container<Dimension, Index> traverseRectDimensionsProducts, const Container<Dimension, Index> dimensionsProducts, FunctionArgs... args) mutable { auto entity = this -> makeEntitity(offset, traverseRectOrigin, traverseRectDimensions, Loading Loading @@ -237,8 +237,8 @@ class Grid { * * For example, let's have a 3-d grid, then the map indexing will be the next: * 0 - 0 - count of vertices * 1, 2, 3 - count of edges in z, y, x plane * 4, 5, 6 - count of faces in yz, xz, xy plane * 1, 2, 3 - count of edges in x, y, z plane * 4, 5, 6 - count of faces in xy, yz, zy plane * 7 - count of cells in z y x plane * * @warning - The ordering of is lexigraphical. Loading Loading @@ -268,7 +268,7 @@ class Grid { int result = 1; for (std::size_t k = 0; k < combinationBuffer.size(); k++) result *= combinationBuffer[k] ? dimensions[k] - 1 : dimensions[k]; result *= combinationBuffer[k] ? dimensions[Dimension - k - 1] - 1 : dimensions[Dimension - k - 1]; dimensionMap[j] = result; cumulativeDimensionMap[i] += result; Loading @@ -279,12 +279,13 @@ class Grid { } __cuda_callable__ inline GridEntity<Dimension, Index> makeEntitity(const Index index, GridEntity<Dimension, Index> makeEntitity(const Index& index, const Container<Dimension, Index>& traverseRectOrigin, const Container<Dimension, Index>& traverseRectDimensions, const Container<Dimension, Index>& traverseRectDimensionsProducts, const Container<Dimension, Index>& dimensionsProducts) const { const auto traverseCoordinates = getCoordinates(index, traverseRectDimensions); //Container<Dimension, Index> traverseCoordinates = 0; Container<Dimension, Index> traverseCoordinates = getCoordinates(index, traverseRectDimensions); Container<Dimension, Index> globalCoordinates = 0; for (Index i = 0; i < Dimension; i++) Loading @@ -298,22 +299,23 @@ class Grid { * Calculates position in the specific boundaries */ __cuda_callable__ inline Container<Dimension, Index> getCoordinates(const Index index, const Container<Dimension, Index> &dimensions) const { Container<Dimension, Index> getCoordinates(const Index& index, const Container<Dimension, Index> &dimensions) const { Container<Dimension, Index> coordinates = 0; Index tmpIndex = index; Index dim = Dimension - 1; Index dimensionIndex = 0; // TODO: - Implement overflow check. while (tmpIndex) { Index dimension = dimensions[dim], while (tmpIndex && dimensionIndex < Dimension) { Index dimension = dimensions[dimensionIndex], quotient = tmpIndex / dimension, reminder = tmpIndex - (dimension * quotient); coordinates[dim] = reminder; coordinates[dimensionIndex] = reminder; tmpIndex = quotient; dim -= 1; dimensionIndex += 1; } return coordinates; Loading @@ -325,8 +327,10 @@ class Grid { Container<Dimension, Index> getDimensionProducts(const Container<Dimension, Index>& dimensions) const noexcept { Container<Dimension, Index> products = 0; for (Index i = Dimension; i > 0; i--) products[i - 1] = i == Dimension ? 1 : products[i] * dimensions[i]; products[0] = 1; for (Index i = 1; i < Dimension; i++) products[i] = dimensions[i - 1] * products[i - 1]; return products; } Loading Loading @@ -370,7 +374,7 @@ bool HeatmapSolver<Real>::solve(const HeatmapSolver<Real>::Parameters ¶ms) c ux = 0; aux = 0; auto init = [=] __cuda_callable__(GridEntity<2, int> entity) mutable { auto init = [=] __cuda_callable__(const GridEntity<2, int>& entity) mutable { auto position = entity.getCoordinates(); auto index = entity.getIndex(); Loading
src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/CMakeLists.txt +2 −2 Original line number Diff line number Diff line if (BUILD_CUDA) CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cu ) CUDA_ADD_EXECUTABLE( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cu ) ELSE() add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h ../../Base/HeatmapSolverBenchmark.h main.h main.cpp ) add_executable( heat_parallel_for_grid ../implementation.h ../../Base/HeatmapSolver.h main.h main.cpp ) ENDIF()
src/Benchmarks/HeatEquationGrid/HeatmapParallelFor/solution/main.h +4 −2 Original line number Diff line number Diff line Loading @@ -18,16 +18,18 @@ int main(int argc, char *argv[]) { if (!parseCommandLine(argc, argv, config, parameters)) return EXIT_FAILURE; parameters.addParameter("outputData", true); auto device = parameters.getParameter<TNL::String>("device"); auto params = HeatmapSolver<Real>::Parameters(parameters); HeatmapSolver<Real> solver; if (device == "host" && !solver.solve<TNL::Devices::Host>(params)) if (device == "host" && !solver.template solve<TNL::Devices::Host>(params)) return EXIT_FAILURE; #ifdef HAVE_CUDA if (device == "cuda" && !solver.solve<TNL::Devices::Cuda>(params)) if (device == "cuda" && !solver.template solve<TNL::Devices::Cuda>(params)) return EXIT_FAILURE; #endif Loading