diff --git a/Documentation/Doxyfile b/Documentation/Doxyfile index 58f4b4b1f5dd14df7d91fabad213d05e09fcf08e..8b3faf7032228a2d66bdb75a1784db70e0e14a07 100644 --- a/Documentation/Doxyfile +++ b/Documentation/Doxyfile @@ -2123,6 +2123,7 @@ INCLUDE_FILE_PATTERNS = PREDEFINED = DOXYGEN_ONLY PREDEFINED += HAVE_MPI PREDEFINED += HAVE_CUDA +PREDEFINED += HAVE_HYPRE # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/Documentation/Pages/main-page.md b/Documentation/Pages/main-page.md index 8ad6e6d80bbce5da120f208e30e6d9e5a0e3a9e4..32d97d75f47c039c34356a4ae9b777df4352aa69 100644 --- a/Documentation/Pages/main-page.md +++ b/Documentation/Pages/main-page.md @@ -178,6 +178,11 @@ computing platform, and (optionally) some libraries. <td> </td> <td> Only used for the compilation of the `tnl-decompose-mesh` tool. </td> </tr> + <tr><td> [Hypre](https://github.com/hypre-space/hypre) </td> + <td> \ref Hypre "Wrappers for Hypre solvers" </td> + <td> `-DHAVE_HYPRE -lHYPRE` </td> + <td> Attention should be paid to Hypre build options, e.g. `--enable-bigint`. </td> + </tr> <tr><td> [libpng](http://www.libpng.org/pub/png/libpng.html) </td> <td> \ref TNL::Images "Image processing" classes </td> <td> `-DHAVE_PNG_H -lpng` </td> diff --git a/cmake/FindHypre.cmake b/cmake/FindHypre.cmake new file mode 100644 index 0000000000000000000000000000000000000000..4b87af73902ecfb65f415ae2edc392165e5ab086 --- /dev/null +++ b/cmake/FindHypre.cmake @@ -0,0 +1,78 @@ +################################################################################ +# +# \file FindHypre.cmake +# \copyright 2012-2015 J. Bakosi, +# 2016-2018 Los Alamos National Security, LLC., +# 2019-2021 Triad National Security, LLC. +# All rights reserved. See the LICENSE file for details. +# \brief Find the Hypre library from LLNL +# +################################################################################ + +# Hypre: https://github.com/LLNL/hypre +# +# HYPRE_FOUND - System has Hypre +# HYPRE_INCLUDE_DIRS - The Hypre include directory +# HYPRE_LIBRARIES - The libraries needed to use Hypre +# +# Set HYPRE_ROOT before calling find_package to a path to add an additional +# search path, e.g., +# +# Usage: +# +# set(HYPRE_ROOT "/path/to/custom/hypre") # prefer over system +# find_package(Hypre) +# if(HYPRE_FOUND) +# target_link_libraries (TARGET ${HYPRE_LIBRARIES}) +# endif() + +function(_HYPRE_GET_VERSION _OUT_ver _version_hdr) + file(STRINGS ${_version_hdr} _contents REGEX "#define HYPRE_RELEASE_VERSION[ \t]+") + if(_contents) + string(REGEX REPLACE "\"" "" _cont "${_contents}") + string(REGEX REPLACE ".*#define HYPRE_RELEASE_VERSION[ \t]+([0-9.]+).*" "\\1" ${_OUT_ver} "${_cont}") + if(NOT ${${_OUT_ver}} MATCHES "[0-9]+") + message(FATAL_ERROR "Version parsing failed for HYPRE_RELEASE_VERSION in ${_version_hdr}!") + endif() + set(${_OUT_ver} ${${_OUT_ver}} PARENT_SCOPE) + elseif(EXISTS ${_version_hdr}) + message(FATAL_ERROR "No HYPRE_RELEASE_VERSION in ${_version_hdr}") + else() + message(FATAL_ERROR "Include file ${_version_hdr} does not exist") + endif() +endfunction() + +# If already in cache, be silent +if(HYPRE_INCLUDE_DIRS AND HYPRE_LIBRARIES) + set (HYPRE_FIND_QUIETLY TRUE) +endif() + +if (HYPRE_ROOT) + set(HYPRE_SEARCH_OPTS NO_DEFAULT_PATH) +else() + set(HYPRE_ROOT "/usr") +endif() + +find_path(HYPRE_INCLUDE_DIR NAMES HYPRE.h + PATH_SUFFIXES hypre + HINTS ${HYPRE_ROOT}/include ${HYPRE_ROOT}/include/hypre + ${HYPRE_SEARCH_OPTS}) + +if(HYPRE_INCLUDE_DIR) + _HYPRE_GET_VERSION(HYPRE_VERSION ${HYPRE_INCLUDE_DIR}/HYPRE_config.h) + set(HYPRE_INCLUDE_DIRS ${HYPRE_INCLUDE_DIR}) +else() + set(HYPRE_VERSION 0.0.0) + set(HYPRE_INCLUDE_DIRS "") +endif() + +find_library(HYPRE_LIBRARY NAMES HYPRE HINTS ${HYPRE_ROOT}/lib) + +set(HYPRE_LIBRARIES ${HYPRE_LIBRARY}) + +# Handle the QUIETLY and REQUIRED arguments and set HYPRE_FOUND to TRUE if +# all listed variables are TRUE. +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Hypre REQUIRED_VARS HYPRE_LIBRARIES HYPRE_INCLUDE_DIRS VERSION_VAR HYPRE_VERSION) + +MARK_AS_ADVANCED(HYPRE_INCLUDE_DIRS HYPRE_LIBRARIES) diff --git a/src/Examples/CMakeLists.txt b/src/Examples/CMakeLists.txt index 1e8a400bcbbc68dd3aec2b06bd4592372c05e13c..11ce01f0ed78ef144af39241607677d91c6063dc 100644 --- a/src/Examples/CMakeLists.txt +++ b/src/Examples/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory( Hypre ) + #add_subdirectory( simple-examples ) #add_subdirectory( Hamilton-Jacobi ) #add_subdirectory( heat-equation ) diff --git a/src/Examples/Hypre/CMakeLists.txt b/src/Examples/Hypre/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c9eb9184bf5cb46574592bea7d8e3357f64f8b5 --- /dev/null +++ b/src/Examples/Hypre/CMakeLists.txt @@ -0,0 +1,13 @@ +if( ${BUILD_MPI} ) + find_package(Hypre) + if( ${HYPRE_FOUND} ) + foreach( source IN ITEMS hypre-ex5.c tnl-hypre-ex5.cpp ) + string( REGEX REPLACE "\.cpp|\.c" "" target ${source} ) + add_executable( ${target} ${source} ) + target_compile_definitions( ${target} PUBLIC "-DHAVE_HYPRE" ) + target_include_directories( ${target} PUBLIC ${HYPRE_INCLUDE_DIRS} ) + target_link_libraries( ${target} ${HYPRE_LIBRARIES} -lm ) + install( TARGETS ${target} RUNTIME DESTINATION bin ) + endforeach() + endif() +endif() diff --git a/src/Examples/Hypre/ex.h b/src/Examples/Hypre/ex.h new file mode 100644 index 0000000000000000000000000000000000000000..8b2324c1700b1b8a23d9e34cba84c1808dbf13e1 --- /dev/null +++ b/src/Examples/Hypre/ex.h @@ -0,0 +1,47 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/*-------------------------------------------------------------------------- + * Header file for examples + *--------------------------------------------------------------------------*/ + +#ifndef HYPRE_EXAMPLES_INCLUDES +#define HYPRE_EXAMPLES_INCLUDES + +#include <HYPRE_config.h> + +#if defined(HYPRE_EXAMPLE_USING_CUDA) + +#include <cuda_runtime.h> + +#ifndef HYPRE_USING_UNIFIED_MEMORY +#error *** Running the examples on GPUs requires Unified Memory. Please reconfigure and rebuild with --enable-unified-memory *** +#endif + +static inline void* +gpu_malloc(size_t size) +{ + void *ptr = NULL; + cudaMallocManaged(&ptr, size, cudaMemAttachGlobal); + return ptr; +} + +static inline void* +gpu_calloc(size_t num, size_t size) +{ + void *ptr = NULL; + cudaMallocManaged(&ptr, num * size, cudaMemAttachGlobal); + cudaMemset(ptr, 0, num * size); + return ptr; +} + +#define malloc(size) gpu_malloc(size) +#define calloc(num, size) gpu_calloc(num, size) +#define free(ptr) ( cudaFree(ptr), ptr = NULL ) +#endif /* #if defined(HYPRE_EXAMPLE_USING_CUDA) */ +#endif /* #ifndef HYPRE_EXAMPLES_INCLUDES */ + diff --git a/src/Examples/Hypre/glvis-ex5.sh b/src/Examples/Hypre/glvis-ex5.sh new file mode 100755 index 0000000000000000000000000000000000000000..403dcf447772edf7ed6ddb484c7abafa3307dcfa --- /dev/null +++ b/src/Examples/Hypre/glvis-ex5.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +ex=ex5 +keys=Aaamc + +dir="${1:-.}" +mesh="$dir/$ex.mesh" +sol="$dir/$ex.sol" + +if ! test -e "$mesh"; then + echo "Error: cannot find mesh file for $ex" + exit 1 +fi + +echo "FiniteElementSpace" > "$sol" +echo "FiniteElementCollection: H1_2D_P1" >> "$sol" +echo "VDim: 1" >> "$sol" +echo "Ordering: 0" >> "$sol" +echo "" >> "$sol" +find "$dir" -name "$ex.sol.??????" | sort | xargs cat >> "$sol" + +glvis -m "$mesh" -g "$sol" -k "$keys" diff --git a/src/Examples/Hypre/hypre-ex5.c b/src/Examples/Hypre/hypre-ex5.c new file mode 100644 index 0000000000000000000000000000000000000000..79c3128afa363e8474fa8c7d9b99df6c931f110e --- /dev/null +++ b/src/Examples/Hypre/hypre-ex5.c @@ -0,0 +1,674 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* + Example 5 + + Interface: Linear-Algebraic (IJ) + + Compile with: use the install script from TNL + + Sample run: OMP_NUM_THREADS=1 mpirun -np 4 hypre-ex5 + + Description: + + This example solves the 2-D Laplacian problem with zero boundary conditions + on an n x n grid. The number of unknowns is N=n^2. The standard 5-point + stencil is used, and we solve for the interior nodes only. + + The example is based on Hypre's Example 5 and its modification "ex5big" that + illustrates the 64-bit integer support in Hypre needed to run problems with + more than 2B unknowns. We recommend comparing this example with both + examples in Hypre: + + - https://github.com/hypre-space/hypre/blob/master/src/examples/ex5.c + - https://github.com/hypre-space/hypre/blob/master/src/examples/ex5big.c + + To enable the 64-bit integer support, you need to build Hypre with the + --enable-bigint option of the configure script. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include <HYPRE_krylov.h> +#include <HYPRE.h> +#include <HYPRE_parcsr_ls.h> + +#include "ex.h" +#include "vis.c" + +int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, int iterations, + double rel_residual_norm); + +#define my_min(a,b) (((a)<(b)) ? (a) : (b)) + +int main (int argc, char *argv[]) +{ + HYPRE_Int i; + int myid, num_procs; + int N, n; + + HYPRE_Int ilower, iupper; + HYPRE_Int local_size, extra; + + int solver_id; + int vis, print_system; + + double h, h2; + + HYPRE_IJMatrix A; + HYPRE_ParCSRMatrix parcsr_A; + HYPRE_IJVector b; + HYPRE_ParVector par_b; + HYPRE_IJVector x; + HYPRE_ParVector par_x; + + HYPRE_Solver solver, precond; + + /* Initialize MPI */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + + /* Initialize HYPRE */ + HYPRE_Init(); + + /* Print GPU info */ + /* HYPRE_PrintDeviceInfo(); */ +#if defined(HYPRE_USING_GPU) + /* use cuSPARSE for SpGEMM */ + HYPRE_SetSpGemmUseCusparse(0); +#endif + + /* Default problem parameters */ + n = 33; + solver_id = 0; + vis = 0; + print_system = 0; + + + /* Parse command line */ + { + int arg_index = 0; + int print_usage = 0; + + while (arg_index < argc) + { + if ( strcmp(argv[arg_index], "-n") == 0 ) + { + arg_index++; + n = atoi(argv[arg_index++]); + } + else if ( strcmp(argv[arg_index], "-solver") == 0 ) + { + arg_index++; + solver_id = atoi(argv[arg_index++]); + } + else if ( strcmp(argv[arg_index], "-vis") == 0 ) + { + arg_index++; + vis = 1; + } + else if ( strcmp(argv[arg_index], "-print_system") == 0 ) + { + arg_index++; + print_system = 1; + } + else if ( strcmp(argv[arg_index], "-help") == 0 ) + { + print_usage = 1; + break; + } + else + { + arg_index++; + } + } + + if ((print_usage) && (myid == 0)) + { + printf("\n"); + printf("Usage: %s [<options>]\n", argv[0]); + printf("\n"); + printf(" -n <n> : problem size in each direction (default: 33)\n"); + printf(" -solver <ID> : solver ID\n"); + printf(" 0 - AMG (default) \n"); + printf(" 1 - AMG-PCG\n"); + printf(" 8 - ParaSails-PCG\n"); + printf(" 50 - PCG\n"); + printf(" 61 - AMG-FlexGMRES\n"); + printf(" -vis : save the solution for GLVis visualization\n"); + printf(" -print_system : print the matrix and rhs\n"); + printf("\n"); + } + + if (print_usage) + { + HYPRE_Finalize(); + MPI_Finalize(); + return (0); + } + } + + /* Preliminaries: want at least one processor per row */ + if (n * n < num_procs) { n = sqrt(num_procs) + 1; } + N = n * n; /* global number of rows */ + h = 1.0 / (n + 1); /* mesh size*/ + h2 = h * h; + + /* Each processor knows only of its own rows - the range is denoted by ilower + and upper. Here we partition the rows. We account for the fact that + N may not divide evenly by the number of processors. */ + local_size = N / num_procs; + extra = N - local_size * num_procs; + + ilower = local_size * myid; + ilower += my_min(myid, extra); + + iupper = local_size * (myid + 1); + iupper += my_min(myid + 1, extra); + iupper = iupper - 1; + + /* How many rows do I have? */ + local_size = iupper - ilower + 1; + + /* Create the matrix. + Note that this is a square matrix, so we indicate the row partition + size twice (since number of rows = number of cols) */ + HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); + + /* Choose a parallel csr format storage (see the User's Manual) */ + HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); + + /* Initialize before setting coefficients */ + HYPRE_IJMatrixInitialize(A); + + /* Now go through my local rows and set the matrix entries. + Each row has at most 5 entries. For example, if n=3: + + A = [M -I 0; -I M -I; 0 -I M] + M = [4 -1 0; -1 4 -1; 0 -1 4] + + Note that here we are setting one row at a time, though + one could set all the rows together (see the User's Manual). + */ + { + int nnz; + /* OK to use constant-length arrays for CPUs + double values[5]; + HYPRE_Int cols[5]; + */ + double *values = (double *) malloc(5 * sizeof(double)); + HYPRE_Int *cols = (HYPRE_Int *) malloc(5 * sizeof(HYPRE_Int)); + HYPRE_Int *tmp = (HYPRE_Int *) malloc(2 * sizeof(HYPRE_Int)); + + for (i = ilower; i <= iupper; i++) + { + nnz = 0; + + /* The left identity block:position i-n */ + if ((i - n) >= 0) + { + cols[nnz] = i - n; + values[nnz] = -1.0; + nnz++; + } + + /* The left -1: position i-1 */ + if (i % n) + { + cols[nnz] = i - 1; + values[nnz] = -1.0; + nnz++; + } + + /* Set the diagonal: position i */ + cols[nnz] = i; + values[nnz] = 4.0; + nnz++; + + /* The right -1: position i+1 */ + if ((i + 1) % n) + { + cols[nnz] = i + 1; + values[nnz] = -1.0; + nnz++; + } + + /* The right identity block:position i+n */ + if ((i + n) < N) + { + cols[nnz] = i + n; + values[nnz] = -1.0; + nnz++; + } + + /* Set the values for row i */ + tmp[0] = nnz; + tmp[1] = i; + HYPRE_IJMatrixSetValues(A, 1, &tmp[0], &tmp[1], cols, values); + } + + free(values); + free(cols); + free(tmp); + } + + /* Assemble after setting the coefficients */ + HYPRE_IJMatrixAssemble(A); + + /* Note: for the testing of small problems, one may wish to read + in a matrix in IJ format (for the format, see the output files + from the -print_system option). + In this case, one would use the following routine: + HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD, + HYPRE_PARCSR, &A ); + <filename> = IJ.A.out to read in what has been printed out + by -print_system (processor numbers are omitted). + A call to HYPRE_IJMatrixRead is an *alternative* to the + following sequence of HYPRE_IJMatrix calls: + Create, SetObjectType, Initialize, SetValues, and Assemble + */ + + + /* Get the parcsr matrix object to use */ + HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); + + + /* Create the rhs and solution */ + HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper, &b); + HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); + HYPRE_IJVectorInitialize(b); + + HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper, &x); + HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); + HYPRE_IJVectorInitialize(x); + + /* Set the rhs values to h^2 and the solution to zero */ + { + double *rhs_values, *x_values; + HYPRE_Int *rows; + + rhs_values = (double*) calloc(local_size, sizeof(double)); + x_values = (double*) calloc(local_size, sizeof(double)); + rows = (HYPRE_Int*) calloc(local_size, sizeof(HYPRE_Int)); + + for (i = 0; i < local_size; i++) + { + rhs_values[i] = h2; + x_values[i] = 0.0; + rows[i] = ilower + i; + } + + HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); + HYPRE_IJVectorSetValues(x, local_size, rows, x_values); + + free(x_values); + free(rhs_values); + free(rows); + } + + + HYPRE_IJVectorAssemble(b); + /* As with the matrix, for testing purposes, one may wish to read in a rhs: + HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD, + HYPRE_PARCSR, &b ); + as an alternative to the + following sequence of HYPRE_IJVectors calls: + Create, SetObjectType, Initialize, SetValues, and Assemble + */ + HYPRE_IJVectorGetObject(b, (void **) &par_b); + + HYPRE_IJVectorAssemble(x); + HYPRE_IJVectorGetObject(x, (void **) &par_x); + + + /* Print out the system - files names will be IJ.out.A.XXXXX + and IJ.out.b.XXXXX, where XXXXX = processor id */ + if (print_system) + { + HYPRE_IJMatrixPrint(A, "IJ.out.A"); + HYPRE_IJVectorPrint(b, "IJ.out.b"); + } + + + /* Choose a solver and solve the system */ + + /* AMG */ + if (solver_id == 0) + { + HYPRE_Int num_iterations; + double final_res_norm; + + /* Create solver */ + HYPRE_BoomerAMGCreate(&solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ + HYPRE_BoomerAMGSetOldDefault(solver); /* Falgout coarsening with modified classical interpolation */ + HYPRE_BoomerAMGSetRelaxType(solver, 6); /* Sym. G-S/Jacobi hybrid relaxation */ + HYPRE_BoomerAMGSetRelaxOrder(solver, 1); /* Uses C/F relaxation */ + HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ + HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ + + /* Now setup and solve! */ + HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); + HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destroy solver */ + HYPRE_BoomerAMGDestroy(solver); + } + /* PCG */ + else if (solver_id == 50) + { + HYPRE_Int num_iterations; + double final_res_norm; + + /* Create solver */ + HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ + HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ + HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ + + /* Now setup and solve! */ + HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_PCGGetNumIterations(solver, &num_iterations); + HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destroy solver */ + HYPRE_ParCSRPCGDestroy(solver); + } + /* PCG with AMG preconditioner */ + else if (solver_id == 1) + { + HYPRE_Int num_iterations; + double final_res_norm; + + /* Create solver */ + HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ + HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ + HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ + + /* Now set up the AMG preconditioner and specify any parameters */ + HYPRE_BoomerAMGCreate(&precond); + HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ + HYPRE_BoomerAMGSetCoarsenType(precond, 6); + HYPRE_BoomerAMGSetOldDefault(precond); + HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid relaxation */ + HYPRE_BoomerAMGSetRelaxOrder(precond, 1); /* Uses C/F relaxation */ + HYPRE_BoomerAMGSetNumSweeps(precond, 1); + HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ + HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ + + /* Set the PCG preconditioner */ + HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, + (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); + + /* Now setup and solve! */ + HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_PCGGetNumIterations(solver, &num_iterations); + HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destroy solver and preconditioner */ + HYPRE_ParCSRPCGDestroy(solver); + HYPRE_BoomerAMGDestroy(precond); + } + /* PCG with Parasails Preconditioner */ + else if (solver_id == 8) + { + HYPRE_Int num_iterations; + double final_res_norm; + + int sai_max_levels = 1; + double sai_threshold = 0.1; + double sai_filter = 0.05; + int sai_sym = 1; + + /* Create solver */ + HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ + HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ + HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ + + /* Now set up the ParaSails preconditioner and specify any parameters */ + HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); + HYPRE_ParaSailsSetFilter(precond, sai_filter); + HYPRE_ParaSailsSetSym(precond, sai_sym); + HYPRE_ParaSailsSetLogging(precond, 3); + + /* Set the PCG preconditioner */ + HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, + (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); + + /* Now setup and solve! */ + HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); + + + /* Run info - needed logging turned on */ + HYPRE_PCGGetNumIterations(solver, &num_iterations); + HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destory solver and preconditioner */ + HYPRE_ParCSRPCGDestroy(solver); + HYPRE_ParaSailsDestroy(precond); + } + /* Flexible GMRES with AMG Preconditioner */ + else if (solver_id == 61) + { + HYPRE_Int num_iterations; + double final_res_norm; + int restart = 30; + int modify = 1; + + + /* Create solver */ + HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_FlexGMRESSetKDim(solver, restart); + HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */ + HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */ + + + /* Now set up the AMG preconditioner and specify any parameters */ + HYPRE_BoomerAMGCreate(&precond); + HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ + HYPRE_BoomerAMGSetCoarsenType(precond, 6); + HYPRE_BoomerAMGSetOldDefault(precond); + HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid relaxation */ + HYPRE_BoomerAMGSetRelaxOrder(precond, 1); /* Uses C/F relaxation */ + HYPRE_BoomerAMGSetNumSweeps(precond, 1); + HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ + HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ + + /* Set the FlexGMRES preconditioner */ + HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, + (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); + + + if (modify) + { + /* this is an optional call - if you don't call it, hypre_FlexGMRESModifyPCDefault + is used - which does nothing. Otherwise, you can define your own, similar to + the one used here */ + HYPRE_FlexGMRESSetModifyPC( + solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample); + } + + + /* Now setup and solve! */ + HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations); + HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destory solver and preconditioner */ + HYPRE_ParCSRFlexGMRESDestroy(solver); + HYPRE_BoomerAMGDestroy(precond); + + } + else + { + if (myid == 0) { printf("Invalid solver id specified.\n"); } + } + + /* Save the solution for GLVis visualization, see vis/glvis-ex5.sh */ + if (vis) + { + FILE *file; + char filename[255]; + + int nvalues = local_size; + int *rows = (int*) calloc(nvalues, sizeof(int)); + double *values = (double*) calloc(nvalues, sizeof(double)); + + for (i = 0; i < nvalues; i++) + { + rows[i] = ilower + i; + } + + /* get the local solution */ + HYPRE_IJVectorGetValues(x, nvalues, rows, values); + + sprintf(filename, "%s.%06d", "vis/ex5.sol", myid); + if ((file = fopen(filename, "w")) == NULL) + { + printf("Error: can't open output file %s\n", filename); + HYPRE_Finalize(); + MPI_Finalize(); + exit(1); + } + + /* save solution */ + for (i = 0; i < nvalues; i++) + { + fprintf(file, "%.14e\n", values[i]); + } + + fflush(file); + fclose(file); + + free(rows); + free(values); + + /* save global finite element mesh */ + if (myid == 0) + { + GLVis_PrintGlobalSquareMesh("vis/ex5.mesh", n - 1); + } + } + + /* Clean up */ + HYPRE_IJMatrixDestroy(A); + HYPRE_IJVectorDestroy(b); + HYPRE_IJVectorDestroy(x); + + /* Finalize HYPRE */ + HYPRE_Finalize(); + + /* Finalize MPI*/ + MPI_Finalize(); + + return (0); +} + +/*-------------------------------------------------------------------------- + hypre_FlexGMRESModifyPCAMGExample - + + This is an example (not recommended) + of how we can modify things about AMG that + affect the solve phase based on how FlexGMRES is doing...For + another preconditioner it may make sense to modify the tolerance.. + + *--------------------------------------------------------------------------*/ + +int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, int iterations, + double rel_residual_norm) +{ + + + if (rel_residual_norm > .1) + { + HYPRE_BoomerAMGSetNumSweeps((HYPRE_Solver)precond_data, 10); + } + else + { + HYPRE_BoomerAMGSetNumSweeps((HYPRE_Solver)precond_data, 1); + } + + + return 0; +} diff --git a/src/Examples/Hypre/tnl-hypre-ex5.cpp b/src/Examples/Hypre/tnl-hypre-ex5.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4a55476ac1c13be243dca546879634b268daa662 --- /dev/null +++ b/src/Examples/Hypre/tnl-hypre-ex5.cpp @@ -0,0 +1,405 @@ +/** + * Description: + * + * This example solves the 2-D Laplacian problem with zero boundary conditions + * on an n x n grid. The number of unknowns is N=n^2. The standard 5-point + * stencil is used, and we solve for the interior nodes only. + * + * The example is based on the "ex5.c" example. It demonstrates how the C code + * can be ported to C++ and TNL. We recommend comparing this example the + * original one. + * + * Compile with: use the install script from TNL + * + * Sample run: OMP_NUM_THREADS=1 mpirun -np 4 tnl-hypre-ex5 + */ + +#include <cstdio> +#include <string> + +#include <TNL/Math.h> +#include <TNL/MPI.h> +#include <TNL/Hypre.h> +#include <TNL/Containers/HypreParVector.h> +#include <TNL/Matrices/HypreParCSRMatrix.h> +#include <TNL/Solvers/Linear/Hypre.h> + +#include "vis.c" + +int +hypre_FlexGMRESModifyPCAMGExample( void* precond_data, int iterations, double rel_residual_norm ); + +int +main( int argc, char* argv[] ) +{ + // Initialize MPI + TNL::MPI::ScopedInitializer mpi( argc, argv ); + const int myid = TNL::MPI::GetRank( MPI_COMM_WORLD ); + const int num_procs = TNL::MPI::GetSize( MPI_COMM_WORLD ); + + // Initialize HYPRE and set some global options, notably HYPRE_SetSpGemmUseCusparse(0); + TNL::Hypre hypre; + + // Default problem parameters + int n = 33; + int solver_id = 0; + int vis = 0; + + // Parse the command line + { + int arg_index = 0; + int print_usage = 0; + + while( arg_index < argc ) { + if( std::string( argv[ arg_index ] ) == "-n" ) { + arg_index++; + n = std::stoi( argv[ arg_index++ ] ); + } + else if( std::string( argv[ arg_index ] ) == "-solver" ) { + arg_index++; + solver_id = std::stoi( argv[ arg_index++ ] ); + } + else if( std::string( argv[ arg_index ] ) == "-vis" ) { + arg_index++; + vis = 1; + } + else if( std::string( argv[ arg_index ] ) == "-help" ) { + print_usage = 1; + break; + } + else { + arg_index++; + } + } + + if( print_usage && myid == 0 ) { + std::cerr << "\n" + "Usage: " << argv[ 0 ] << " [<options>]\n" + "\n" + " -n <n> : problem size in each direction (default: 33)\n" + " -solver <ID> : solver ID\n" + " 0 - AMG (default) \n" + " 1 - AMG-PCG\n" + " 8 - ParaSails-PCG\n" + " 50 - PCG\n" + " 61 - AMG-FlexGMRES\n" + " -vis : save the solution for GLVis visualization\n" + << std::endl; + } + + if( print_usage ) + return EXIT_SUCCESS; + } + + // Set the problem sizes + // Preliminaries: we want at least one rank per row + if( n * n < num_procs ) + n = std::sqrt( num_procs ) + 1; + const int N = n * n; // global number of rows + const double h = 1.0 / ( n + 1 ); // mesh cell size + + /* Each rank knows only of its own rows - the range is denoted by ilower + and upper. Here we partition the rows. We account for the fact that + N may not divide evenly by the number of ranks. */ + HYPRE_Int local_size = N / num_procs; + HYPRE_Int extra = N - local_size * num_procs; + + HYPRE_Int ilower = local_size * myid; + ilower += TNL::min( myid, extra ); + + HYPRE_Int iupper = local_size * ( myid + 1 ); + iupper += TNL::min( myid + 1, extra ); + iupper = iupper - 1; + + // How many rows do I have? + local_size = iupper - ilower + 1; + + // Let each rank create its local matrix in the CSR format in TNL + using CSR = TNL::Matrices::SparseMatrix< double, TNL::HYPRE_Device, HYPRE_Int >; + CSR A_local; + A_local.setDimensions( local_size, N ); + + // Allocate row capacities - this must match exactly the sparsity pattern of + // the matrix + typename CSR::RowsCapacitiesType capacities; + capacities.setSize( local_size ); + auto capacities_view = capacities.getView(); + TNL::Algorithms::ParallelFor< TNL::HYPRE_Device >::exec( ilower, iupper + 1, + [=] __cuda_callable__ ( HYPRE_Int i ) mutable + { + int nnz = 0; + + // The left identity block: position i-n + if( i - n >= 0 ) + nnz++; + + // The left -1: position i-1 + if( i % n ) + nnz++; + + // The diagonal: position i + nnz++; + + // The right -1: position i+1 + if( ( i + 1 ) % n ) + nnz++; + + // The right identity block: position i+n + if( i + n < N ) + nnz++; + + // The row index must be converted from global to local + capacities_view[ i - ilower ] = nnz; + } ); + A_local.setRowCapacities( capacities ); + + /* Now assemble the local matrix. Each row has at most 5 entries. For + * example, if n=3: + * A = [M -I 0; -I M -I; 0 -I M] + * M = [4 -1 0; -1 4 -1; 0 -1 4] + */ + A_local.forAllRows( [=] __cuda_callable__ ( typename CSR::RowView& row ) mutable { + // The row index must be converted from local to global + const HYPRE_Int i = ilower + row.getRowIndex(); + int nnz = 0; + + // The left identity block: position i-n + if( i - n >= 0 ) + row.setElement( nnz++, i - n, -1.0 ); + + // The left -1: position i-1 + if( i % n ) + row.setElement( nnz++, i - 1, -1.0 ); + + // The diagonal: position i + row.setElement( nnz++, i, 4.0 ); + + // The right -1: position i+1 + if( ( i + 1 ) % n ) + row.setElement( nnz++, i + 1, -1.0 ); + + // The right identity block: position i+n + if( i + n < N ) + row.setElement( nnz++, i + n, -1.0 ); + } ); + + // Bind the TNL matrix to HypreCSR + TNL::Matrices::HypreCSRMatrix A_local_hypre; + A_local_hypre.bind( A_local ); + + // Assemble the distributed matrix in Hypre from the local blocks + // Note that this is a square matrix, so we indicate the row partition + // size twice (since number of rows = number of cols) + using HypreParCSR = TNL::Matrices::HypreParCSRMatrix; + HypreParCSR parcsr_A = HypreParCSR::fromLocalBlocks( MPI_COMM_WORLD, N, N, {ilower, iupper + 1}, {ilower, iupper + 1}, A_local_hypre ); + + // Deallocate the local matrix since it is not needed anymore in this example + A_local_hypre.reset(); + A_local.reset(); + + + // Create the rhs and solution vectors + TNL::Containers::HypreParVector par_b; + TNL::Containers::HypreParVector par_x; + + par_b.setDistribution( {ilower, iupper + 1}, 0, N, MPI_COMM_WORLD ); + par_x.setDistribution( {ilower, iupper + 1}, 0, N, MPI_COMM_WORLD ); + + // Set the rhs values to h^2 and the solution to zero + par_b.setValue( h * h ); + par_x.setValue( 0.0 ); + + + // Choose a solver and solve the system + + // AMG + if( solver_id == 0 ) { + // Create the solver + TNL::Solvers::Linear::HypreBoomerAMG solver; + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + // NOTE: The wrapper class sets its own default options that are + // different from Hypre. The overriding settings below result in + // the same state as the hypre-ex5.c example. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_BoomerAMGSetPrintLevel( solver, 3 ); // Print solve info + parameters + HYPRE_BoomerAMGSetOldDefault( solver ); // Falgout coarsening with modified classical interpolation + HYPRE_BoomerAMGSetRelaxType( solver, 6 ); // Sym. G-S/Jacobi hybrid relaxation + HYPRE_BoomerAMGSetRelaxOrder( solver, 1 ); // Uses C/F relaxation + HYPRE_BoomerAMGSetNumSweeps( solver, 1 ); // Sweeeps on each level + HYPRE_BoomerAMGSetTol( solver, 1e-7 ); // Convergence tolerance + HYPRE_BoomerAMGSetMaxIter( solver, 20 ); // Use as solver: max iterations + HYPRE_BoomerAMGSetAggNumLevels( solver, 0 ); // number of aggressive coarsening levels + + // Solve the linear system (calls AMG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // PCG + else if( solver_id == 50 ) { + // Create the solver + TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_PCGSetMaxIter( solver, 1000 ); // max iterations + HYPRE_PCGSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_PCGSetTwoNorm( solver, 1 ); // use the two norm as the stopping criteria + HYPRE_PCGSetPrintLevel( solver, 2 ); // prints out the iteration info + + // Set the matrix of the linear system + solver.setMatrix( parcsr_A ); + + // Ignore errors returned from Hypre functions (e.g. when PCG does not + // converge within the maximum iterations limit). + solver.setErrorMode( solver.WARN_HYPRE_ERRORS ); + + // Solve the linear system (calls PCG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // PCG with AMG preconditioner + else if( solver_id == 1 ) { + // Create the solver + TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD ); + + // Create the preconditioner + TNL::Solvers::Linear::HypreBoomerAMG precond; + + // Set the PCG preconditioner + solver.setPreconditioner( precond ); + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_BoomerAMGSetPrintLevel( precond, 1 ); // Print setup info + parameters + HYPRE_BoomerAMGSetOldDefault( precond ); // Falgout coarsening with modified classical interpolation + HYPRE_BoomerAMGSetRelaxType( precond, 6 ); // Sym G.S./Jacobi hybrid relaxation + HYPRE_BoomerAMGSetRelaxOrder( precond, 1 ); // Uses C/F relaxation + HYPRE_BoomerAMGSetAggNumLevels( precond, 0 ); // number of aggressive coarsening levels + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_PCGSetMaxIter( solver, 1000 ); // max iterations + HYPRE_PCGSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_PCGSetTwoNorm( solver, 1 ); // use the two norm as the stopping criteria + HYPRE_PCGSetPrintLevel( solver, 2 ); // prints out the iteration info + + // Solve the linear system (calls PCG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // PCG with ParaSails preconditioner + else if( solver_id == 8 ) { + // Create the solver + TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD ); + + // Create the preconditioner + TNL::Solvers::Linear::HypreParaSails precond( MPI_COMM_WORLD ); + + // Set the PCG preconditioner + solver.setPreconditioner( precond ); + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_PCGSetMaxIter( solver, 1000 ); // max iterations + HYPRE_PCGSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_PCGSetTwoNorm( solver, 1 ); // use the two norm as the stopping criteria + HYPRE_PCGSetPrintLevel( solver, 2 ); // prints out the iteration info + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_ParaSailsSetParams( precond, 0.1, 1 ); // threshold and max levels + HYPRE_ParaSailsSetFilter( precond, 0.05 ); + HYPRE_ParaSailsSetSym( precond, 1 ); + + // Solve the linear system (calls PCG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // Flexible GMRES with AMG preconditioner + else if( solver_id == 61 ) { + // Create the solver + TNL::Solvers::Linear::HypreFlexGMRES solver( MPI_COMM_WORLD ); + + // Create the preconditioner + TNL::Solvers::Linear::HypreBoomerAMG precond; + + // Set the FlexGMRES preconditioner + solver.setPreconditioner( precond ); + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_FlexGMRESSetKDim( solver, 30 ); // restart parameter + HYPRE_FlexGMRESSetMaxIter( solver, 1000 ); // max iterations + HYPRE_FlexGMRESSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_FlexGMRESSetPrintLevel( solver, 2 ); // print solve info + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_BoomerAMGSetPrintLevel( precond, 1 ); // Print setup info + parameters + HYPRE_BoomerAMGSetOldDefault( precond ); // Falgout coarsening with modified classical interpolation + HYPRE_BoomerAMGSetRelaxType( precond, 6 ); // Sym G.S./Jacobi hybrid relaxation + HYPRE_BoomerAMGSetRelaxOrder( precond, 1 ); // Uses C/F relaxation + HYPRE_BoomerAMGSetAggNumLevels( precond, 0 ); // number of aggressive coarsening levels + + // This is an optional call - if you don't call it, + // hypre_FlexGMRESModifyPCDefault is used - which does nothing. + // Otherwise, you can define your own, similar to the one used here + HYPRE_FlexGMRESSetModifyPC( solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample ); + + // Solve the linear system (calls FlexGMRES setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + else if( myid == 0 ) { + std::cerr << "Invalid solver id specified." << std::endl; + return EXIT_FAILURE; + } + + // Save the solution for GLVis visualization, see glvis-ex5.sh + if( vis ) { + char filename[ 255 ]; + sprintf( filename, "%s.%06d", "vis_tnl/ex5.sol", myid ); + FILE* file = fopen( filename, "w" ); + if( file == nullptr ) { + printf( "Error: can't open output file %s\n", filename ); + return EXIT_FAILURE; + } + + // Save the solution + const auto local_x = par_x.getConstLocalView(); + for( HYPRE_Int i = 0; i < local_size; i++ ) + fprintf( file, "%.14e\n", local_x[ i ] ); + + fflush( file ); + fclose( file ); + + // Save the global finite element mesh + if( myid == 0 ) + GLVis_PrintGlobalSquareMesh( "vis_tnl/ex5.mesh", n - 1 ); + } + + return EXIT_SUCCESS; +} + +/** + * This is an example (not recommended) of how we can modify things about AMG + * that affect the solve phase based on how FlexGMRES is doing... For another + * preconditioner it may make sense to modify the tolerance. + */ +int +hypre_FlexGMRESModifyPCAMGExample( void* precond_data, int iterations, double rel_residual_norm ) +{ + if( rel_residual_norm > .1 ) + HYPRE_BoomerAMGSetNumSweeps( (HYPRE_Solver) precond_data, 10 ); + else + HYPRE_BoomerAMGSetNumSweeps( (HYPRE_Solver) precond_data, 1 ); + return 0; +} diff --git a/src/Examples/Hypre/verify-ex5.sh b/src/Examples/Hypre/verify-ex5.sh new file mode 100755 index 0000000000000000000000000000000000000000..6ca17d194d01a4859547298e58ba622c9256db6d --- /dev/null +++ b/src/Examples/Hypre/verify-ex5.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e + +# make output directories +mkdir -p vis vis_tnl + +# dbg suffix for binaries +#dbg="" +dbg="-dbg" + +# problem size: N=n^2 +n=100 + +# disable OpenMP for reproducibility +export OMP_NUM_THREADS=1 + +make_glvis() { + local dir="$1" + local sol="$dir/ex5.sol" + + echo "FiniteElementSpace" > "$sol" + echo "FiniteElementCollection: H1_2D_P1" >> "$sol" + echo "VDim: 1" >> "$sol" + echo "Ordering: 0" >> "$sol" + echo "" >> "$sol" + find "$dir" -name "ex5.sol.??????" | sort | xargs cat >> "$sol" +} + +verify() { + make_glvis vis + make_glvis vis_tnl + diff "vis/ex5.sol" "vis_tnl/ex5.sol" + rm -f vis/ex5.sol* vis_tnl/ex5.sol* +} + +# BoomerAMG +mpirun hypre-ex5$dbg -vis -n $n -solver 0 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 0 +verify + +# PCG +mpirun hypre-ex5$dbg -vis -n $n -solver 50 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 50 +verify + +# PCG with AMG +mpirun hypre-ex5$dbg -vis -n $n -solver 1 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 1 +verify + +# PCG with ParaSails +mpirun hypre-ex5$dbg -vis -n $n -solver 8 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 8 +verify + +# FlexGMRES with AMG +mpirun hypre-ex5$dbg -vis -n $n -solver 61 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 61 +verify diff --git a/src/Examples/Hypre/vis.c b/src/Examples/Hypre/vis.c new file mode 100644 index 0000000000000000000000000000000000000000..b051fb90e8e6f4bf1ddd30461d1a8482f2fccdb2 --- /dev/null +++ b/src/Examples/Hypre/vis.c @@ -0,0 +1,60 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* Save a structured n x n mesh of square elements on the unit square into a + GLVis mesh file with the given name. */ +#include <math.h> +void GLVis_PrintGlobalSquareMesh(const char *meshfile, int n) +{ + FILE *file; + + int Dim = 2; + int NumOfVertices = (n + 1) * (n + 1); + int NumOfElements = n * n; + + int i, j; + double x, y; + double h = 1.0 / n; + + if ((file = fopen(meshfile, "w")) == NULL) + { + printf("Error: can't open output file %s\n", meshfile); + exit(1); + } + + /* mesh header */ + fprintf(file, "MFEM mesh v1.0\n"); + fprintf(file, "\ndimension\n"); + fprintf(file, "%d\n", Dim); + + /* mesh elements */ + fprintf(file, "\nelements\n"); + fprintf(file, "%d\n", NumOfElements); + for (j = 0; j < n; j++) + for (i = 0; i < n; i++) + fprintf(file, "1 3 %d %d %d %d\n", i + j * (n + 1), i + 1 + j * (n + 1), + i + 1 + (j + 1) * (n + 1), i + (j + 1) * (n + 1)); + + /* boundary will be generated by GLVis */ + fprintf(file, "\nboundary\n"); + fprintf(file, "0\n"); + + /* mesh vertices */ + fprintf(file, "\nvertices\n"); + fprintf(file, "%d\n", NumOfVertices); + fprintf(file, "%d\n", Dim); + for (j = 0; j < n + 1; j++) + for (i = 0; i < n + 1; i++) + { + x = i * h; + y = j * h; + fprintf(file, "%.14e %.14e\n", x, y); + } + + fflush(file); + fclose(file); +} diff --git a/src/TNL/Algorithms/Segments/CSRView.h b/src/TNL/Algorithms/Segments/CSRView.h index e4d98a1641de3ac3f01dfad374c9779f7c15d427..6216a16113748f1d053874a9bd65d24223e796c3 100644 --- a/src/TNL/Algorithms/Segments/CSRView.h +++ b/src/TNL/Algorithms/Segments/CSRView.h @@ -29,7 +29,7 @@ public: using IndexType = std::remove_const_t< Index >; using KernelType = Kernel; using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >; - using ConstOffsetsView = typename Containers::Vector< Index, DeviceType, IndexType >::ConstViewType; + using ConstOffsetsView = typename OffsetsView::ConstViewType; using KernelView = typename Kernel::ViewType; using ViewType = CSRView; template< typename Device_, typename Index_ > @@ -167,6 +167,18 @@ public: SegmentsPrinter< CSRView, Fetch > print( Fetch&& fetch ) const; + OffsetsView + getOffsets() + { + return offsets; + } + + ConstOffsetsView + getOffsets() const + { + return offsets.getConstView(); + } + KernelType& getKernel() { diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 473c778cb2cb3b2247df9510663163e6e889ff50..b5158a5d77c1731749332be1a8423da11a40b8cd 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -134,7 +134,7 @@ template< typename Value, typename Device, typename Index, typename Allocator > std::string Array< Value, Device, Index, Allocator >::getSerializationType() { - return detail::ArrayIO< Value, Device, Index >::getSerializationType(); + return detail::ArrayIO< Value, Index, Allocator >::getSerializationType(); } template< typename Value, typename Device, typename Index, typename Allocator > @@ -579,7 +579,7 @@ operator<<( File& file, const Array< Value, Device, Index, Allocator >& array ) { using IO = detail::ArrayIO< Value, Index, Allocator >; saveObjectType( file, IO::getSerializationType() ); - const Index size = array.getSize(); + const std::size_t size = array.getSize(); file.save( &size ); IO::save( file, array.getData(), array.getSize() ); return file; @@ -603,11 +603,9 @@ operator>>( File& file, Array< Value, Device, Index, Allocator >& array ) if( type != IO::getSerializationType() ) throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." ); - Index _size; - file.load( &_size ); - if( _size < 0 ) - throw Exceptions::FileDeserializationError( file.getFileName(), "invalid array size: " + std::to_string( _size ) ); - array.setSize( _size ); + std::size_t size; + file.load( &size ); + array.setSize( size ); IO::load( file, array.getData(), array.getSize() ); return file; } diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index a166b4ca250b9f9973a44b74db1617300bc20cfb..9164e1fdedd45367f16a4ffd6c54837d485c13d4 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -355,7 +355,7 @@ operator<<( File& file, const ArrayView< Value, Device, Index > view ) { using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >; saveObjectType( file, IO::getSerializationType() ); - const Index size = view.getSize(); + const std::size_t size = view.getSize(); file.save( &size ); IO::save( file, view.getData(), view.getSize() ); return file; @@ -379,11 +379,11 @@ operator>>( File& file, ArrayView< Value, Device, Index > view ) if( type != IO::getSerializationType() ) throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." ); - Index _size; - file.load( &_size ); - if( _size != view.getSize() ) + std::size_t size; + file.load( &size ); + if( size != static_cast< std::size_t >( view.getSize() ) ) throw Exceptions::FileDeserializationError( file.getFileName(), - "invalid array size: " + std::to_string( _size ) + " (expected " + "invalid array size: " + std::to_string( size ) + " (expected " + std::to_string( view.getSize() ) + ")." ); IO::load( file, view.getData(), view.getSize() ); return file; diff --git a/src/TNL/Containers/DistributedArray.h b/src/TNL/Containers/DistributedArray.h index 1f40b9c4edb64d7b916082ddd6cc8d92ac33adb9..c84ec014c266ad4a8aa6369fa6fb7a9ce1dbfb56 100644 --- a/src/TNL/Containers/DistributedArray.h +++ b/src/TNL/Containers/DistributedArray.h @@ -277,6 +277,12 @@ public: void forElements( IndexType begin, IndexType end, Function&& f ) const; + void + loadFromGlobalFile( const String& fileName, bool allowCasting = false ); + + void + loadFromGlobalFile( File& file, bool allowCasting = false ); + protected: ViewType view; LocalArrayType localData; diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp index 4dc65636e17fcdb1d2878bd2c5efa6e02b909be0..bf86ea9bad84abc0d00d6f96123cfc62ca695b8c 100644 --- a/src/TNL/Containers/DistributedArray.hpp +++ b/src/TNL/Containers/DistributedArray.hpp @@ -133,7 +133,7 @@ void DistributedArray< Value, Device, Index, Allocator >::setSynchronizer( std::shared_ptr< SynchronizerType > synchronizer, int valuesPerElement ) { - view.setSynchronizer( synchronizer, valuesPerElement ); + view.setSynchronizer( std::move( synchronizer ), valuesPerElement ); } template< typename Value, typename Device, typename Index, typename Allocator > @@ -316,5 +316,19 @@ DistributedArray< Value, Device, Index, Allocator >::forElements( IndexType begi view.forElements( begin, end, f ); } +template< typename Value, typename Device, typename Index, typename Allocator > +void +DistributedArray< Value, Device, Index, Allocator >::loadFromGlobalFile( const String& fileName, bool allowCasting ) +{ + view.loadFromGlobalFile( fileName, allowCasting ); +} + +template< typename Value, typename Device, typename Index, typename Allocator > +void +DistributedArray< Value, Device, Index, Allocator >::loadFromGlobalFile( File& file, bool allowCasting ) +{ + view.loadFromGlobalFile( file, allowCasting ); +} + } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/DistributedArrayView.h b/src/TNL/Containers/DistributedArrayView.h index dfc7e490463204bcef6779ef817ed078f3982129..7476a7e0294897aaf63c69c094d7dcabbf8b0115 100644 --- a/src/TNL/Containers/DistributedArrayView.h +++ b/src/TNL/Containers/DistributedArrayView.h @@ -253,6 +253,12 @@ public: void forElements( IndexType begin, IndexType end, Function&& f ) const; + void + loadFromGlobalFile( const String& fileName, bool allowCasting = false ); + + void + loadFromGlobalFile( File& file, bool allowCasting = false ); + protected: LocalRangeType localRange; IndexType ghosts = 0; diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp index f995f63b0455d76e4f98dde43c1d8b8c1a2d0c74..a583d2958a34a755f79203065419c89bcda6e7b1 100644 --- a/src/TNL/Containers/DistributedArrayView.hpp +++ b/src/TNL/Containers/DistributedArrayView.hpp @@ -151,7 +151,7 @@ void DistributedArrayView< Value, Device, Index >::setSynchronizer( std::shared_ptr< SynchronizerType > synchronizer, int valuesPerElement ) { - this->synchronizer = synchronizer; + this->synchronizer = std::move( synchronizer ); this->valuesPerElement = valuesPerElement; } @@ -377,5 +377,35 @@ DistributedArrayView< Value, Device, Index >::forElements( IndexType begin, Inde localData.forElements( localBegin, localEnd, local_f ); } +template< typename Value, typename Device, typename Index > +void +DistributedArrayView< Value, Device, Index >::loadFromGlobalFile( const String& fileName, bool allowCasting ) +{ + File file( fileName, std::ios_base::in ); + loadFromGlobalFile( file, allowCasting ); +} + +template< typename Value, typename Device, typename Index > +void +DistributedArrayView< Value, Device, Index >::loadFromGlobalFile( File& file, bool allowCasting ) +{ + using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >; + const std::string type = getObjectType( file ); + const auto parsedType = parseObjectType( type ); + + if( ! allowCasting && type != IO::getSerializationType() ) + throw Exceptions::FileDeserializationError( + file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." ); + + std::size_t elementsInFile; + file.load( &elementsInFile ); + + if( allowCasting ) + IO::loadSubrange( + file, elementsInFile, localRange.getBegin(), localData.getData(), localData.getSize(), parsedType[ 1 ] ); + else + IO::loadSubrange( file, elementsInFile, localRange.getBegin(), localData.getData(), localData.getSize() ); +} + } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/HypreParVector.h b/src/TNL/Containers/HypreParVector.h new file mode 100644 index 0000000000000000000000000000000000000000..20258e3dd9ba409d2c84248568893200ac0daaa9 --- /dev/null +++ b/src/TNL/Containers/HypreParVector.h @@ -0,0 +1,375 @@ +// Copyright (c) 2004-2022 Tomáš Oberhuber et al. +// +// This file is part of TNL - Template Numerical Library (https://tnl-project.org/) +// +// SPDX-License-Identifier: MIT + +// Implemented by: Jakub Klinkovský + +#pragma once + +#ifdef HAVE_HYPRE + + #include <TNL/Containers/HypreVector.h> + #include <TNL/Containers/DistributedVector.h> + +namespace TNL { +namespace Containers { + +/** + * \brief Wrapper for Hypre's parallel vector. + * + * Links to upstream sources: + * - https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/par_vector.h + * - https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/par_vector.c + * - https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/_hypre_parcsr_mv.h (catch-all interface) + * + * \ingroup Hypre + */ +class HypreParVector +{ +public: + using RealType = HYPRE_Real; + using ValueType = RealType; + using DeviceType = HYPRE_Device; + using IndexType = HYPRE_Int; + using LocalRangeType = Subrange< IndexType >; + + using VectorType = Containers::DistributedVector< RealType, DeviceType, IndexType >; + using ViewType = typename VectorType::ViewType; + using ConstViewType = typename VectorType::ConstViewType; + + using LocalVectorType = Containers::Vector< RealType, DeviceType, IndexType >; + using LocalViewType = typename LocalVectorType::ViewType; + using ConstLocalViewType = typename LocalVectorType::ConstViewType; + + using SynchronizerType = typename ViewType::SynchronizerType; + + // default constructor, no underlying \e hypre_ParVector is created. + HypreParVector() = default; + + // TODO: behavior should depend on "owns_data" (shallow vs deep copy) + HypreParVector( const HypreParVector& other ) = delete; + + HypreParVector( HypreParVector&& other ) noexcept + : v( other.v ), owns_handle( other.owns_handle ), localData( std::move( other.localData ) ), ghosts( other.ghosts ), + synchronizer( std::move( other.synchronizer ) ), valuesPerElement( other.valuesPerElement ) + { + other.v = nullptr; + } + + // TODO should do a deep copy + HypreParVector& + operator=( const HypreParVector& other ) = delete; + + HypreParVector& + operator=( HypreParVector&& other ) noexcept + { + v = other.v; + other.v = nullptr; + owns_handle = other.owns_handle; + localData = std::move( other.localData ); + ghosts = other.ghosts; + synchronizer = std::move( other.synchronizer ); + valuesPerElement = other.valuesPerElement; + return *this; + } + + //! \brief Initialization by raw data + HypreParVector( const LocalRangeType& localRange, + IndexType ghosts, + IndexType globalSize, + MPI_Comm communicator, + LocalViewType localData ) + { + bind( localRange, ghosts, globalSize, communicator, localData ); + } + + /** + * \brief Convert Hypre's format to HypreParVector + * + * \param take_ownership indicates if the vector should take ownership of + * the handle, i.e. whether to call \e hypre_VectorDestroy when it does + * not need it anymore. + */ + explicit HypreParVector( hypre_ParVector* handle, bool take_ownership = true ) + { + bind( handle, take_ownership ); + } + + //! Typecasting to Hypre's \e hypre_ParVector* (which is equivalent to \e HYPRE_ParVector*) + operator hypre_ParVector*() const + { + return v; + } + + ~HypreParVector() + { + reset(); + } + + LocalRangeType + getLocalRange() const + { + if( v == nullptr ) + return {}; + return { hypre_ParVectorPartitioning( v )[ 0 ], hypre_ParVectorPartitioning( v )[ 1 ] }; + } + + IndexType + getGhosts() const + { + return ghosts; + } + + //! \brief Return the MPI communicator. + MPI_Comm + getCommunicator() const + { + if( v == nullptr ) + return MPI_COMM_NULL; + return hypre_ParVectorComm( v ); + } + + //! \brief Returns the global size of the vector. + HYPRE_Int + getSize() const + { + if( v == nullptr ) + return 0; + return hypre_ParVectorGlobalSize( v ); + } + + ConstLocalViewType + getConstLocalView() const + { + return { localData.getData(), getLocalRange().getSize() }; + } + + LocalViewType + getLocalView() + { + return { localData.getData(), getLocalRange().getSize() }; + } + + ConstLocalViewType + getConstLocalViewWithGhosts() const + { + return localData.getConstView(); + } + + LocalViewType + getLocalViewWithGhosts() + { + return localData.getView(); + } + + ConstViewType + getConstView() const + { + return { getLocalRange(), ghosts, getSize(), getCommunicator(), getConstLocalViewWithGhosts() }; + } + + ViewType + getView() + { + return { getLocalRange(), ghosts, getSize(), getCommunicator(), getLocalViewWithGhosts() }; + } + + /** + * \brief Drop previously set data (deallocate if the vector was the owner) + * and bind to the given data (i.e., the vector does not become the owner). + */ + void + bind( const LocalRangeType& localRange, + IndexType ghosts, + IndexType globalSize, + MPI_Comm communicator, + LocalViewType localData ) + { + TNL_ASSERT_EQ( localData.getSize(), + localRange.getSize() + ghosts, + "The local array size does not match the local range of the distributed array." ); + TNL_ASSERT_GE( ghosts, 0, "The ghosts count must be non-negative." ); + + // drop/deallocate the current data + reset(); + + // create handle for the vector + IndexType partitioning[ 2 ]; + partitioning[ 0 ] = localRange.getBegin(); + partitioning[ 1 ] = localRange.getEnd(); + v = hypre_ParVectorCreate( communicator, globalSize, partitioning ); + + // set view data + this->localData.bind( localData ); + hypre_ParVectorOwnsData( v ) = 0; + hypre_ParVectorLocalVector( v ) = this->localData; + hypre_ParVectorActualLocalSize( v ) = this->localData.getSize(); + + this->ghosts = ghosts; + } + + void + bind( ViewType view ) + { + bind( view.getLocalRange(), view.getGhosts(), view.getSize(), view.getCommunicator(), view.getLocalViewWithGhosts() ); + } + + void + bind( VectorType& vector ) + { + bind( vector.getView() ); + } + + void + bind( HypreParVector& vector ) + { + bind( vector.getLocalRange(), + vector.getGhosts(), + vector.getSize(), + vector.getCommunicator(), + vector.getLocalViewWithGhosts() ); + } + + /** + * \brief Convert Hypre's format to HypreParVector + * + * \param take_ownership indicates if the vector should take ownership of + * the handle, i.e. whether to call \e hypre_VectorDestroy when it does + * not need it anymore. + */ + void + bind( hypre_ParVector* handle, bool take_ownership = true ) + { + // drop/deallocate the current data + if( v != nullptr ) + hypre_ParVectorDestroy( v ); + + // set the handle and ownership flag + v = handle; + owns_handle = take_ownership; + + // set view data + localData.bind( hypre_ParVectorLocalVector( v ) ); + hypre_ParVectorOwnsData( v ) = 0; + hypre_ParVectorLocalVector( v ) = this->localData; + hypre_ParVectorActualLocalSize( v ) = this->localData.getSize(); + + const HYPRE_Int actual_size = hypre_ParVectorActualLocalSize( v ); + const HYPRE_Int local_size = hypre_ParVectorPartitioning( v )[ 1 ] - hypre_ParVectorPartitioning( v )[ 0 ]; + this->ghosts = actual_size - local_size; + } + + //! \brief Reset the vector to empty state. + void + reset() + { + // drop/deallocate the current data + if( owns_handle && v != nullptr ) { + hypre_ParVectorDestroy( v ); + v = nullptr; + } + else + v = nullptr; + owns_handle = true; + + localData.reset(); + ghosts = 0; + synchronizer = nullptr; + valuesPerElement = 1; + } + + void + setDistribution( LocalRangeType localRange, IndexType ghosts, IndexType globalSize, const MPI::Comm& communicator ) + { + TNL_ASSERT_LE( localRange.getEnd(), globalSize, "end of the local range is outside of the global range" ); + + // drop/deallocate the current data + reset(); + + // create handle for the vector + IndexType partitioning[ 2 ]; + partitioning[ 0 ] = localRange.getBegin(); + partitioning[ 1 ] = localRange.getEnd(); + v = hypre_ParVectorCreate( communicator, globalSize, partitioning ); + + // set view data + this->localData.setSize( localRange.getSize() + ghosts ); + hypre_ParVectorOwnsData( v ) = 0; + hypre_ParVectorLocalVector( v ) = this->localData; + hypre_ParVectorActualLocalSize( v ) = this->localData.getSize(); + + this->ghosts = ghosts; + } + + //! \brief Set all elements of the vector to \e value. + void + setValue( RealType value ) + { + if( v != nullptr ) + hypre_ParVectorSetConstantValues( v, value ); + } + + // synchronizer stuff + void + setSynchronizer( std::shared_ptr< SynchronizerType > synchronizer, int valuesPerElement = 1 ) + { + this->synchronizer = std::move( synchronizer ); + this->valuesPerElement = valuesPerElement; + } + + std::shared_ptr< SynchronizerType > + getSynchronizer() const + { + return synchronizer; + } + + int + getValuesPerElement() const + { + return valuesPerElement; + } + + // Note that this method is not thread-safe - only the thread which created + // and "owns" the instance of this object can call this method. + void + startSynchronization() + { + if( ghosts == 0 ) + return; + // TODO: assert does not play very nice with automatic synchronizations from operations like + // assignment of scalars + // (Maybe we should just drop all automatic syncs? But that's not nice for high-level codes + // like linear solvers...) + TNL_ASSERT_TRUE( synchronizer, "the synchronizer was not set" ); + + typename SynchronizerType::ByteArrayView bytes; + bytes.bind( reinterpret_cast< std::uint8_t* >( localData.getData() ), sizeof( ValueType ) * localData.getSize() ); + synchronizer->synchronizeByteArrayAsync( bytes, sizeof( ValueType ) * valuesPerElement ); + } + + void + waitForSynchronization() const + { + if( synchronizer && synchronizer->async_op.valid() ) { + synchronizer->async_wait_timer.start(); + synchronizer->async_op.wait(); + synchronizer->async_wait_timer.stop(); + } + } + +protected: + hypre_ParVector* v = nullptr; + bool owns_handle = true; + HypreVector localData; + HYPRE_Int ghosts = 0; + + std::shared_ptr< SynchronizerType > synchronizer = nullptr; + int valuesPerElement = 1; +}; + +} // namespace Containers +} // namespace TNL + +#endif // HAVE_HYPRE diff --git a/src/TNL/Containers/HypreVector.h b/src/TNL/Containers/HypreVector.h new file mode 100644 index 0000000000000000000000000000000000000000..5fc6a6ebdc92156e0650b959eeae702ab53de590 --- /dev/null +++ b/src/TNL/Containers/HypreVector.h @@ -0,0 +1,265 @@ +// Copyright (c) 2004-2022 Tomáš Oberhuber et al. +// +// This file is part of TNL - Template Numerical Library (https://tnl-project.org/) +// +// SPDX-License-Identifier: MIT + +// Implemented by: Jakub Klinkovský + +#pragma once + +#ifdef HAVE_HYPRE + + #include <TNL/Hypre.h> + + #include <TNL/Containers/Vector.h> + +namespace TNL { +namespace Containers { + +/** + * \brief Wrapper for Hypre's sequential vector. + * + * Links to upstream sources: + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/vector.h + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/vector.c + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/seq_mv.h (catch-all interface) + * + * \ingroup Hypre + */ +class HypreVector +{ +public: + using RealType = HYPRE_Real; + using ValueType = RealType; + using DeviceType = HYPRE_Device; + using IndexType = HYPRE_Int; + + using VectorType = Containers::Vector< RealType, DeviceType, IndexType >; + using ViewType = typename VectorType::ViewType; + using ConstViewType = typename VectorType::ConstViewType; + + HypreVector() = default; + + // TODO: behavior should depend on "owns_data" (shallow vs deep copy) + HypreVector( const HypreVector& other ) = delete; + + HypreVector( HypreVector&& other ) noexcept : v( other.v ), owns_handle( other.owns_handle ) + { + other.v = nullptr; + } + + // TODO should do a deep copy + HypreVector& + operator=( const HypreVector& other ) = delete; + + HypreVector& + operator=( HypreVector&& other ) noexcept + { + v = other.v; + other.v = nullptr; + owns_handle = other.owns_handle; + return *this; + } + + HypreVector( RealType* data, IndexType size ) + { + bind( data, size ); + } + + HypreVector( ViewType view ) + { + bind( view ); + } + + /** + * \brief Convert Hypre's format to HypreVector + * + * \param take_ownership indicates if the vector should take ownership of + * the handle, i.e. whether to call \e hypre_VectorDestroy when it does + * not need it anymore. + */ + explicit HypreVector( hypre_Vector* handle, bool take_ownership = true ) + { + bind( handle, take_ownership ); + } + + operator const hypre_Vector*() const noexcept + { + return v; + } + + operator hypre_Vector*() noexcept + { + return v; + } + + // HYPRE_Vector is "equivalent" to pointer to hypre_Vector, but requires + // ugly C-style cast on the pointer (which is done even in Hypre itself) + // https://github.com/hypre-space/hypre/blob/master/src/seq_mv/HYPRE_vector.c + operator HYPRE_Vector() const noexcept + { + return (HYPRE_Vector) v; + } + + ~HypreVector() + { + reset(); + } + + const RealType* + getData() const noexcept + { + if( v == nullptr ) + return nullptr; + return hypre_VectorData( v ); + } + + RealType* + getData() noexcept + { + if( v == nullptr ) + return nullptr; + return hypre_VectorData( v ); + } + + IndexType + getSize() const + { + if( v == nullptr ) + return 0; + return hypre_VectorSize( v ); + } + + ConstViewType + getConstView() const + { + if( v == nullptr ) + return {}; + return { getData(), getSize() }; + } + + ViewType + getView() + { + if( v == nullptr ) + return {}; + return { getData(), getSize() }; + } + + /** + * \brief Drop previously set data (deallocate if the vector was the owner) + * and bind to the given data (i.e., the vector does not become the owner). + */ + void + bind( RealType* data, IndexType size ) + { + // drop/deallocate the current data + reset(); + + // create handle for the vector + v = hypre_SeqVectorCreate( 0 ); + + // set view data + hypre_VectorOwnsData( v ) = 0; + hypre_VectorData( v ) = data; + hypre_VectorSize( v ) = size; + // TODO: v->memory_location + } + + void + bind( ViewType view ) + { + bind( view.getData(), view.getSize() ); + } + + void + bind( VectorType& vector ) + { + bind( vector.getView() ); + } + + void + bind( HypreVector& vector ) + { + bind( vector.getData(), vector.getSize() ); + } + + /** + * \brief Convert Hypre's format to HypreSeqVector + * + * \param take_ownership indicates if the vector should take ownership of + * the handle, i.e. whether to call \e hypre_VectorDestroy when it does + * not need it anymore. + */ + void + bind( hypre_Vector* handle, bool take_ownership = true ) + { + // drop/deallocate the current data + reset(); + + // set the handle and ownership flag + v = handle; + owns_handle = take_ownership; + } + + //! \brief Reset the vector to empty state. + void + reset() + { + if( owns_handle && v != nullptr ) { + hypre_SeqVectorDestroy( v ); + v = nullptr; + } + else + v = nullptr; + owns_handle = true; + } + + /** + * \brief Set the new vector size. + * + * - if the vector previously owned data, they are deallocated + * - new size is set + * - the vector is initialized with \e hypre_SeqVectorInitialize + * (i.e., data are allocated) + */ + void + setSize( IndexType size ) + { + hypre_SeqVectorDestroy( v ); + v = hypre_SeqVectorCreate( size ); + hypre_SeqVectorInitialize( v ); + } + + //! \brief Equivalent to \ref setSize. + void + resize( IndexType size ) + { + setSize( size ); + } + + //! \brief Equivalent to \ref setSize followed by \ref setValue. + void + resize( IndexType size, RealType value ) + { + setSize( size ); + setValue( value ); + } + + //! \brief Set all elements of the vector to \e value. + void + setValue( RealType value ) + { + hypre_SeqVectorSetConstantValues( v, value ); + } + +protected: + hypre_Vector* v = nullptr; + bool owns_handle = true; +}; + +} // namespace Containers +} // namespace TNL + +#endif // HAVE_HYPRE diff --git a/src/TNL/Containers/Partitioner.h b/src/TNL/Containers/Partitioner.h index e79a6d770572dc56579354bd7c4789332d20d129..3b121b243f1251763dad1cb403ac65da4a3d7d4f 100644 --- a/src/TNL/Containers/Partitioner.h +++ b/src/TNL/Containers/Partitioner.h @@ -29,28 +29,22 @@ public: static SubrangeType splitRange( Index globalSize, const MPI::Comm& communicator ) { - if( communicator != MPI_COMM_NULL ) { - const int rank = communicator.rank(); - const int partitions = communicator.size(); - const Index begin = TNL::min( globalSize, rank * globalSize / partitions ); - const Index end = TNL::min( globalSize, ( rank + 1 ) * globalSize / partitions ); - return SubrangeType( begin, end ); + if( communicator == MPI_COMM_NULL ) + return { 0, 0 }; + + const int rank = communicator.rank(); + const int partitions = communicator.size(); + + const Index partSize = globalSize / partitions; + const int remainder = globalSize % partitions; + if( rank < remainder ) { + const Index begin = rank * ( partSize + 1 ); + const Index end = begin + partSize + 1; + return { begin, end }; } - else - return SubrangeType( 0, 0 ); - } - - // Gets the owner of given global index. - __cuda_callable__ - static int - getOwner( Index i, Index globalSize, int partitions ) - { - int owner = i * partitions / globalSize; - if( owner < partitions - 1 && i >= getOffset( globalSize, owner + 1, partitions ) ) - owner++; - TNL_ASSERT_GE( i, getOffset( globalSize, owner, partitions ), "BUG in getOwner" ); - TNL_ASSERT_LT( i, getOffset( globalSize, owner + 1, partitions ), "BUG in getOwner" ); - return owner; + const Index begin = remainder * ( partSize + 1 ) + ( rank - remainder ) * partSize; + const Index end = begin + partSize; + return { begin, end }; } // Gets the offset of data for given rank. @@ -58,7 +52,11 @@ public: static Index getOffset( Index globalSize, int rank, int partitions ) { - return rank * globalSize / partitions; + const Index partSize = globalSize / partitions; + const int remainder = globalSize % partitions; + if( rank < remainder ) + return rank * ( partSize + 1 ); + return remainder * ( partSize + 1 ) + ( rank - remainder ) * partSize; } // Gets the size of data assigned to given rank. @@ -66,9 +64,11 @@ public: static Index getSizeForRank( Index globalSize, int rank, int partitions ) { - const Index begin = min( globalSize, rank * globalSize / partitions ); - const Index end = min( globalSize, ( rank + 1 ) * globalSize / partitions ); - return end - begin; + const Index partSize = globalSize / partitions; + const int remainder = globalSize % partitions; + if( rank < remainder ) + return partSize + 1; + return partSize; } template< typename Device > diff --git a/src/TNL/Containers/detail/ArrayIO.h b/src/TNL/Containers/detail/ArrayIO.h index a97caf6d1b8e0dc62cf6a7a9c09f8d0f090519dc..31f46c47c9d09387e21c612a2e886562b993594f 100644 --- a/src/TNL/Containers/detail/ArrayIO.h +++ b/src/TNL/Containers/detail/ArrayIO.h @@ -26,12 +26,11 @@ struct ArrayIO< Value, Index, Allocator, true > static std::string getSerializationType() { - return "Containers::Array< " + TNL::getSerializationType< Value >() + ", [any_device], " - + TNL::getSerializationType< Index >() + ", [any_allocator] >"; + return "TNL::Containers::Array< " + TNL::getSerializationType< Value >() + " >"; } static void - save( File& file, const Value* data, const Index elements ) + save( File& file, const Value* data, Index elements ) { Index i; try { @@ -46,7 +45,7 @@ struct ArrayIO< Value, Index, Allocator, true > } static void - load( File& file, Value* data, const Index elements ) + load( File& file, Value* data, Index elements ) { Index i = 0; try { @@ -59,6 +58,31 @@ struct ArrayIO< Value, Index, Allocator, true > + std::to_string( elements ) + " from the file." ); } } + + static void + loadSubrange( File& file, std::size_t elementsInFile, Index offset, Value* data, Index size ) + { + if( std::size_t( offset + size ) > elementsInFile ) + throw Exceptions::FileDeserializationError( + file.getFileName(), "unable to read subrange of array elements: offset + size > elementsInFile" ); + + if( size == 0 ) { + file.ignore< Value >( elementsInFile ); + return; + } + + try { + file.ignore< Value >( offset ); + load( file, data, size ); + file.ignore< Value >( elementsInFile - offset - size ); + } + catch( ... ) { + throw Exceptions::FileDeserializationError( file.getFileName(), + "unable to read array elements in the subrange [" + + std::to_string( offset ) + ", " + std::to_string( offset + size ) + + ") from the file." ); + } + } }; template< typename Value, typename Index, typename Allocator > @@ -67,17 +91,17 @@ struct ArrayIO< Value, Index, Allocator, false > static std::string getSerializationType() { - return "Containers::Array< " + TNL::getSerializationType< Value >() + ", [any_device], " - + TNL::getSerializationType< Index >() + ", [any_allocator] >"; + return "TNL::Containers::Array< " + TNL::getSerializationType< Value >() + " >"; } + template< typename TargetValue = Value > static void - save( File& file, const Value* data, const Index elements ) + save( File& file, const Value* data, Index elements ) { if( elements == 0 ) return; try { - file.save< Value, Value, Allocator >( data, elements ); + file.save< Value, TargetValue, Allocator >( data, elements ); } catch( ... ) { throw Exceptions::FileSerializationError( @@ -86,18 +110,143 @@ struct ArrayIO< Value, Index, Allocator, false > } static void - load( File& file, Value* data, const Index elements ) + save( File& file, const Value* data, Index elements, const std::string& typeInFile ) + { + if( typeInFile == getType< Value >() ) + save< Value >( file, data, elements ); + // check fundamental types for type casting + else if( typeInFile == getType< std::int8_t >() ) + save< std::int8_t >( file, data, elements ); + else if( typeInFile == getType< std::uint8_t >() ) + save< std::uint8_t >( file, data, elements ); + else if( typeInFile == getType< std::int16_t >() ) + save< std::int16_t >( file, data, elements ); + else if( typeInFile == getType< std::uint16_t >() ) + save< std::uint16_t >( file, data, elements ); + else if( typeInFile == getType< std::int32_t >() ) + save< std::int32_t >( file, data, elements ); + else if( typeInFile == getType< std::uint32_t >() ) + save< std::uint32_t >( file, data, elements ); + else if( typeInFile == getType< std::int64_t >() ) + save< std::int64_t >( file, data, elements ); + else if( typeInFile == getType< std::uint64_t >() ) + save< std::uint64_t >( file, data, elements ); + else if( typeInFile == getType< float >() ) + save< float >( file, data, elements ); + else if( typeInFile == getType< double >() ) + save< double >( file, data, elements ); + else + throw Exceptions::FileSerializationError( + file.getFileName(), + "value type " + getType< Value >() + " cannot be type-cast to the requested type " + std::string( typeInFile ) ); + } + + template< typename SourceValue = Value > + static void + load( File& file, Value* data, Index elements ) { if( elements == 0 ) return; try { - file.load< Value, Value, Allocator >( data, elements ); + file.load< Value, SourceValue, Allocator >( data, elements ); } catch( ... ) { throw Exceptions::FileDeserializationError( file.getFileName(), "unable to read " + std::to_string( elements ) + " array elements from the file." ); } } + + static void + load( File& file, Value* data, Index elements, const std::string& typeInFile ) + { + if( typeInFile == getType< Value >() ) + load( file, data, elements ); + // check fundamental types for type casting + else if( typeInFile == getType< std::int8_t >() ) + load< std::int8_t >( file, data, elements ); + else if( typeInFile == getType< std::uint8_t >() ) + load< std::uint8_t >( file, data, elements ); + else if( typeInFile == getType< std::int16_t >() ) + load< std::int16_t >( file, data, elements ); + else if( typeInFile == getType< std::uint16_t >() ) + load< std::uint16_t >( file, data, elements ); + else if( typeInFile == getType< std::int32_t >() ) + load< std::int32_t >( file, data, elements ); + else if( typeInFile == getType< std::uint32_t >() ) + load< std::uint32_t >( file, data, elements ); + else if( typeInFile == getType< std::int64_t >() ) + load< std::int64_t >( file, data, elements ); + else if( typeInFile == getType< std::uint64_t >() ) + load< std::uint64_t >( file, data, elements ); + else if( typeInFile == getType< float >() ) + load< float >( file, data, elements ); + else if( typeInFile == getType< double >() ) + load< double >( file, data, elements ); + else + throw Exceptions::FileDeserializationError( file.getFileName(), + "value type " + std::string( typeInFile ) + + " cannot be type-cast to the requested type " + getType< Value >() ); + } + + template< typename SourceValue = Value > + static void + loadSubrange( File& file, std::size_t elementsInFile, Index offset, Value* data, Index size ) + { + if( std::size_t( offset + size ) > elementsInFile ) + throw Exceptions::FileDeserializationError( + file.getFileName(), + "unable to read subrange of array elements: offset + size > elementsInFile: " + std::to_string( offset ) + " + " + + std::to_string( size ) + " > " + std::to_string( elementsInFile ) ); + + if( size == 0 ) { + file.ignore< SourceValue >( elementsInFile ); + return; + } + + try { + file.ignore< SourceValue >( offset ); + load< SourceValue >( file, data, size ); + file.ignore< SourceValue >( elementsInFile - offset - size ); + } + catch( ... ) { + throw Exceptions::FileDeserializationError( file.getFileName(), + "unable to read array elements in the subrange [" + + std::to_string( offset ) + ", " + std::to_string( offset + size ) + + ") from the file." ); + } + } + + static void + loadSubrange( File& file, std::size_t elementsInFile, Index offset, Value* data, Index size, const std::string& typeInFile ) + { + if( typeInFile == getType< Value >() ) + loadSubrange( file, elementsInFile, offset, data, size ); + // check fundamental types for type casting + else if( typeInFile == getType< std::int8_t >() ) + loadSubrange< std::int8_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::uint8_t >() ) + loadSubrange< std::uint8_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::int16_t >() ) + loadSubrange< std::int16_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::uint16_t >() ) + loadSubrange< std::uint16_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::int32_t >() ) + loadSubrange< std::int32_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::uint32_t >() ) + loadSubrange< std::uint32_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::int64_t >() ) + loadSubrange< std::int64_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< std::uint64_t >() ) + loadSubrange< std::uint64_t >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< float >() ) + loadSubrange< float >( file, elementsInFile, offset, data, size ); + else if( typeInFile == getType< double >() ) + loadSubrange< double >( file, elementsInFile, offset, data, size ); + else + throw Exceptions::FileDeserializationError( file.getFileName(), + "value type " + std::string( typeInFile ) + + " cannot be type-cast to the requested type " + getType< Value >() ); + } }; } // namespace detail diff --git a/src/TNL/File.h b/src/TNL/File.h index 92dfd58e4edaa89b63a368e4989b48f62997e37b..7449a15921f88774bdf350f8e28518cbfe8a7675 100644 --- a/src/TNL/File.h +++ b/src/TNL/File.h @@ -140,6 +140,18 @@ public: void save( const Type* buffer, std::streamsize elements = 1 ); + /** + * \brief Extracts and discards characters from the file. + * + * Throws \ref std::ios_base::failure on failure. + * + * \tparam SourceType type of data stored on the file, + * \param elements number of elements to be read and ignored. + */ + template< typename SourceType > + void + ignore( std::streamsize elements = 1 ); + protected: // implementation for all allocators which allocate data accessible from host template< typename Type, diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp index 0d10985f2a8ef7803be919d6f79c7b20bd40c179..28dc560cf9d739e2877e5dbe55aec2a8e52a5e31 100644 --- a/src/TNL/File.hpp +++ b/src/TNL/File.hpp @@ -103,7 +103,6 @@ File::load_impl( Type* buffer, std::streamsize elements ) file.read( reinterpret_cast< char* >( cast_buffer.get() ), sizeof( SourceType ) * transfer ); for( std::streamsize i = 0; i < transfer; i++ ) buffer[ readElements++ ] = static_cast< Type >( cast_buffer[ i ] ); - readElements += transfer; } } } @@ -184,7 +183,6 @@ File::save_impl( const Type* buffer, std::streamsize elements ) for( std::streamsize i = 0; i < transfer; i++ ) cast_buffer[ i ] = static_cast< TargetType >( buffer[ writtenElements++ ] ); file.write( reinterpret_cast< char* >( cast_buffer.get() ), sizeof( TargetType ) * transfer ); - writtenElements += transfer; } } } @@ -234,6 +232,15 @@ File::save_impl( const Type* buffer, std::streamsize elements ) #endif } +template< typename SourceType > +void +File::ignore( std::streamsize elements ) +{ + // use seekg instead of ignore for efficiency + // https://stackoverflow.com/a/31246560 + file.seekg( sizeof( SourceType ) * elements, std::ios_base::cur ); +} + inline bool fileExists( const String& fileName ) { diff --git a/src/TNL/Hypre.h b/src/TNL/Hypre.h new file mode 100644 index 0000000000000000000000000000000000000000..591b82fcc168a2788cd138e4bea9681cff99eb54 --- /dev/null +++ b/src/TNL/Hypre.h @@ -0,0 +1,130 @@ +// Copyright (c) 2004-2022 Tomáš Oberhuber et al. +// +// This file is part of TNL - Template Numerical Library (https://tnl-project.org/) +// +// SPDX-License-Identifier: MIT + +// Implemented by: Jakub Klinkovský + +#pragma once + +#ifdef HAVE_HYPRE + + #include <mpi.h> + + // Hypre header files + #include <seq_mv.h> + #include <_hypre_parcsr_mv.h> + #include <_hypre_parcsr_ls.h> + + #ifdef HYPRE_MIXEDINT + #error "TNL does not work with HYPRE's mixed-int support (i.e. when HYPRE_Int and HYPRE_BigInt are different types)" + #endif + #ifdef HYPRE_COMPLEX + #error "TNL does not work with HYPRE's complex numbers support" + #endif + + #if defined( HYPRE_USING_GPU ) && ! ( defined( HYPRE_USING_CUDA ) || defined( HYPRE_USING_HIP ) ) + #error "Unsupported GPU build of HYPRE! Only CUDA and HIP builds are supported." + #endif + #if defined( HYPRE_USING_CUDA ) && ! defined( HAVE_CUDA ) + #error "HAVE_CUDA is required when HYPRE is built with CUDA!" + #endif + #if defined( HYPRE_USING_HIP ) && ! defined( HAVE_HIP ) + #error "HAVE_HIP is required when HYPRE is built with HIP!" + #endif + +namespace TNL { + +/** + * \defgroup Hypre Wrappers for the Hypre library + * + * This group includes various wrapper classes for data structures and + * algorithms implemented in the [Hypre library][hypre]. See the + * [example][example] for how these wrappers can be used. + * + * [hypre]: https://github.com/hypre-space/hypre + * [example]: https://mmg-gitlab.fjfi.cvut.cz/gitlab/tnl/tnl-dev/-/blob/develop/src/Examples/Hypre/tnl-hypre-ex5.cpp + * + * @{ + */ + +/** + * \brief A simple RAII wrapper for Hypre's initialization and finalization. + * + * When the object is constructed, it calls \e HYPRE_Init() and sets some + * GPU-relevant options. The \e HYPRE_Finalize() function is called + * automatically from the object's destructor. + */ +struct Hypre +{ + //! \brief Constructor initializes Hypre by calling \e HYPRE_Init() and set default options. + Hypre() + { + HYPRE_Init(); + + setDefaultOptions(); + } + + //! \brief Sets the default Hypre global options (mostly GPU-relevant). + void + setDefaultOptions() + { + // Global Hypre options, see + // https://hypre.readthedocs.io/en/latest/solvers-boomeramg.html#gpu-supported-options + + #ifdef HYPRE_USING_CUDA + // Use hypre's SpGEMM instead of cuSPARSE for performance reasons + HYPRE_SetSpGemmUseCusparse( 0 ); + #elif defined( HYPRE_USING_HIP ) + // Use rocSPARSE instead of hypre's SpGEMM for performance reasons (default) + HYPRE_SetSpGemmUseCusparse( 1 ); + #endif + + // The following options are Hypre's defaults as of version 2.24 + + // Allocate Hypre objects in GPU memory (default) + // HYPRE_SetMemoryLocation(HYPRE_MEMORY_DEVICE); + + // Where to execute when using UVM (default) + // HYPRE_SetExecutionPolicy(HYPRE_EXEC_DEVICE); + + // Use GPU-based random number generator (default) + // HYPRE_SetUseGpuRand(1); + } + + //! \brief Destructor that finalizes Hypre when the object goes out of scope. + ~Hypre() + { + HYPRE_Finalize(); + } +}; + +} // namespace TNL + +// clang-format off + #ifdef HYPRE_USING_CUDA + #include <TNL/Devices/Cuda.h> + namespace TNL { + using HYPRE_Device = Devices::Cuda; + } + #else + #include <TNL/Devices/Host.h> + namespace TNL { + /** + * \brief The \ref TNL::Devices "device" compatible with Hypre's data + * structures. + * + * The type depends on how the Hypre library was configured. By + * default, it is \ref Devices::Host. When using Hypre built with CUDA + * support, it is \ref Devices::Cuda. + */ + using HYPRE_Device = Devices::Host; + } + #endif +// clang-format on + +// this is a Doxygen end-group marker +//! @} + +#endif // HAVE_HYPRE diff --git a/src/TNL/Matrices/HypreCSRMatrix.h b/src/TNL/Matrices/HypreCSRMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..0a9590ba453e259b0570819b664437acab850dea --- /dev/null +++ b/src/TNL/Matrices/HypreCSRMatrix.h @@ -0,0 +1,410 @@ +// Copyright (c) 2004-2022 Tomáš Oberhuber et al. +// +// This file is part of TNL - Template Numerical Library (https://tnl-project.org/) +// +// SPDX-License-Identifier: MIT + +// Implemented by: Jakub Klinkovský + +#pragma once + +#ifdef HAVE_HYPRE + + #include <TNL/Hypre.h> + + #include <TNL/Matrices/SparseMatrix.h> + +namespace TNL { +namespace Matrices { + +/** + * \brief Wrapper for Hypre's sequential CSR matrix. + * + * Links to upstream sources: + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.h + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.c + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/seq_mv.h (catch-all interface) + * + * \ingroup Hypre + */ +class HypreCSRMatrix +{ +public: + using RealType = HYPRE_Real; + using ValueType = RealType; + using DeviceType = HYPRE_Device; + using IndexType = HYPRE_Int; + + using MatrixType = SparseMatrix< RealType, DeviceType, IndexType, GeneralMatrix, Algorithms::Segments::CSRDefault >; + using ViewType = typename MatrixType::ViewType; + using ConstViewType = typename MatrixType::ConstViewType; + + using ValuesViewType = Containers::VectorView< RealType, DeviceType, IndexType >; + using ConstValuesViewType = typename ValuesViewType::ConstViewType; + using ColumnIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType >; + using ColumnIndexesViewType = typename ColumnIndexesVectorType::ViewType; + using ConstColumnIndexesViewType = typename ColumnIndexesVectorType::ConstViewType; + using SegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, IndexType >; + using ConstSegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, std::add_const_t< IndexType > >; + + HypreCSRMatrix() = default; + + // TODO: behavior should depend on "owns_data" (shallow vs deep copy) + HypreCSRMatrix( const HypreCSRMatrix& other ) = delete; + + HypreCSRMatrix( HypreCSRMatrix&& other ) noexcept : m( other.m ), owns_handle( other.owns_handle ) + { + other.m = nullptr; + } + + // TODO should do a deep copy + HypreCSRMatrix& + operator=( const HypreCSRMatrix& other ) = delete; + + HypreCSRMatrix& + operator=( HypreCSRMatrix&& other ) noexcept + { + m = other.m; + other.m = nullptr; + owns_handle = other.owns_handle; + return *this; + } + + HypreCSRMatrix( IndexType rows, + IndexType columns, + ValuesViewType values, + ColumnIndexesViewType columnIndexes, + ColumnIndexesViewType rowOffsets ) + { + bind( rows, columns, std::move( values ), std::move( columnIndexes ), std::move( rowOffsets ) ); + } + + HypreCSRMatrix( ViewType view ) + { + bind( std::move( view ) ); + } + + /** + * \brief Convert Hypre's format to \e HypreCSRMatrix + * + * \param take_ownership indicates if the matrix should take ownership of + * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does + * not need it anymore. + */ + explicit HypreCSRMatrix( hypre_CSRMatrix* handle, bool take_ownership = true ) + { + bind( handle, take_ownership ); + } + + operator const hypre_CSRMatrix*() const noexcept + { + return m; + } + + operator hypre_CSRMatrix*() noexcept + { + return m; + } + + // HYPRE_CSRMatrix is "equivalent" to pointer to hypre_CSRMatrix, but requires + // ugly C-style cast on the pointer (which is done even in Hypre itself) + // https://github.com/hypre-space/hypre/blob/master/src/seq_mv/HYPRE_csr_matrix.c + operator HYPRE_CSRMatrix() noexcept + { + return (HYPRE_CSRMatrix) m; + } + + ~HypreCSRMatrix() + { + reset(); + } + + IndexType + getRows() const + { + if( m == nullptr ) + return 0; + return hypre_CSRMatrixNumRows( m ); + } + + IndexType + getColumns() const + { + if( m == nullptr ) + return 0; + return hypre_CSRMatrixNumCols( m ); + } + + IndexType + getNonzeroElementsCount() const + { + if( m == nullptr ) + return 0; + return hypre_CSRMatrixNumNonzeros( m ); + } + + ConstValuesViewType + getValues() const + { + if( m == nullptr ) + return {}; + return { hypre_CSRMatrixData( m ), hypre_CSRMatrixNumNonzeros( m ) }; + } + + ValuesViewType + getValues() + { + if( m == nullptr ) + return {}; + return { hypre_CSRMatrixData( m ), hypre_CSRMatrixNumNonzeros( m ) }; + } + + ConstColumnIndexesViewType + getColumnIndexes() const + { + if( m == nullptr ) + return {}; + static_assert( std::is_same< HYPRE_Int, HYPRE_BigInt >::value, + "The J array cannot be accessed via this method when HYPRE_Int and HYPRE_BigInt are different types." ); + if( hypre_CSRMatrixBigJ( m ) != nullptr ) + return { hypre_CSRMatrixBigJ( m ), hypre_CSRMatrixNumNonzeros( m ) }; + return { hypre_CSRMatrixJ( m ), hypre_CSRMatrixNumNonzeros( m ) }; + } + + ColumnIndexesViewType + getColumnIndexes() + { + if( m == nullptr ) + return {}; + static_assert( std::is_same< HYPRE_Int, HYPRE_BigInt >::value, + "The J array cannot be accessed via this method when HYPRE_Int and HYPRE_BigInt are different types." ); + if( hypre_CSRMatrixBigJ( m ) != nullptr ) + return { hypre_CSRMatrixBigJ( m ), hypre_CSRMatrixNumNonzeros( m ) }; + return { hypre_CSRMatrixJ( m ), hypre_CSRMatrixNumNonzeros( m ) }; + } + + ConstColumnIndexesViewType + getRowOffsets() const + { + if( m == nullptr ) + return {}; + if( hypre_CSRMatrixI( m ) == nullptr ) + return {}; + return { hypre_CSRMatrixI( m ), hypre_CSRMatrixNumRows( m ) + 1 }; + } + + ColumnIndexesViewType + getRowOffsets() + { + if( m == nullptr ) + return {}; + if( hypre_CSRMatrixI( m ) == nullptr ) + return {}; + return { hypre_CSRMatrixI( m ), hypre_CSRMatrixNumRows( m ) + 1 }; + } + + ConstSegmentsViewType + getSegments() const + { + if( m == nullptr ) + return {}; + return { getRowOffsets(), typename ConstSegmentsViewType::KernelType{} }; + } + + SegmentsViewType + getSegments() + { + if( m == nullptr ) + return {}; + return { getRowOffsets(), typename SegmentsViewType::KernelType{} }; + } + + ConstViewType + getConstView() const + { + // FIXME: remove const_cast + return { getRows(), getColumns(), getValues(), getColumnIndexes(), const_cast< HypreCSRMatrix* >( this )->getSegments() }; + } + + ViewType + getView() + { + return { getRows(), getColumns(), getValues(), getColumnIndexes(), getSegments() }; + } + + /** + * \brief Drop previously set data (deallocate if the matrix was the owner) + * and bind to the given data (i.e., the matrix does not become the owner). + */ + void + bind( IndexType rows, + IndexType columns, + ValuesViewType values, + ColumnIndexesViewType columnIndexes, + ColumnIndexesViewType rowOffsets ) + { + TNL_ASSERT_EQ( rowOffsets.getSize(), rows + 1, "wrong size of rowOffsets" ); + TNL_ASSERT_EQ( values.getSize(), rowOffsets.getElement( rows ), "wrong size of values" ); + TNL_ASSERT_EQ( columnIndexes.getSize(), rowOffsets.getElement( rows ), "wrong size of columnIndexes" ); + + // drop/deallocate the current data + reset(); + + // create handle for the matrix + m = hypre_CSRMatrixCreate( rows, columns, rowOffsets.getElement( rows ) ); + + // set view data + hypre_CSRMatrixOwnsData( m ) = 0; + hypre_CSRMatrixData( m ) = values.getData(); + hypre_CSRMatrixJ( m ) = columnIndexes.getData(); + hypre_CSRMatrixI( m ) = rowOffsets.getData(); + // TODO: v->memory_location + } + + void + bind( ViewType view ) + { + bind( view.getRows(), view.getColumns(), view.getValues(), view.getColumnIndexes(), view.getSegments().getOffsets() ); + } + + void + bind( MatrixType& matrix ) + { + bind( matrix.getView() ); + } + + void + bind( HypreCSRMatrix& matrix ) + { + bind( matrix.getRows(), matrix.getColumns(), matrix.getValues(), matrix.getColumnIndexes(), matrix.getRowOffsets() ); + } + + /** + * \brief Convert Hypre's format to \e HypreCSRMatrix + * + * \param take_ownership indicates if the matrix should take ownership of + * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does + * not need it anymore. + */ + void + bind( hypre_CSRMatrix* handle, bool take_ownership = true ) + { + // drop/deallocate the current data + reset(); + + // set the handle and ownership flag + m = handle; + owns_handle = take_ownership; + } + + //! \brief Reset the matrix to empty state. + void + reset() + { + if( owns_handle && m != nullptr ) { + // FIXME: workaround for https://github.com/hypre-space/hypre/issues/621 + if( ! hypre_CSRMatrixOwnsData( m ) ) + // prevent deallocation of the "I" array when the handle does not own it + hypre_CSRMatrixI( m ) = nullptr; + + hypre_CSRMatrixDestroy( m ); + m = nullptr; + } + else + m = nullptr; + owns_handle = true; + } + + /** + * \brief Set the new matrix dimensions. + * + * - if the matrix previously owned data, they are deallocated + * - new size is set + * - the matrix is initialized with \e hypre_CSRMatrixInitialize + * (i.e., data are allocated) + */ + void + setDimensions( IndexType rows, IndexType cols ) + { + reset(); + m = hypre_CSRMatrixCreate( rows, cols, 0 ); + hypre_CSRMatrixInitialize( m ); + } + + template< typename RowsCapacitiesVector > + void + setRowCapacities( const RowsCapacitiesVector& rowCapacities ) + { + TNL_ASSERT_EQ( + rowCapacities.getSize(), this->getRows(), "Number of matrix rows does match the rowCapacities vector size." ); + + const IndexType nonzeros = TNL::sum( rowCapacities ); + hypre_CSRMatrixResize( m, getRows(), getColumns(), nonzeros ); + + // initialize row pointers + auto rowOffsets = getRowOffsets(); + TNL_ASSERT_EQ( + rowOffsets.getSize(), rowCapacities.getSize() + 1, "The size of the rowOffsets vector does not match rowCapacities" ); + // GOTCHA: when rowCapacities.getSize() == 0, getView returns a full view with size == 1 + if( rowCapacities.getSize() > 0 ) { + auto view = rowOffsets.getView( 0, rowCapacities.getSize() ); + view = rowCapacities; + } + rowOffsets.setElement( rowCapacities.getSize(), 0 ); + Algorithms::inplaceExclusiveScan( rowOffsets ); + + // reset column indices with the padding index + getColumnIndexes().setValue( -1 ); + } + + /** + * \brief Reorders the column and data arrays of a square matrix, such that + * the first entry in each row is the diagonal one. + * + * Note that the \e hypre_CSRMatrixReorder function only swaps the diagonal + * and first entries in the row, but this function also shifts the + * subdiagonal entries to ensure that the column indices (except for the + * diagonal one) remain in the original order. + */ + void + reorderDiagonalEntries() + { + // operation does not make sense for non-square matrices + if( getRows() != getColumns() ) + return; + + getView().forAllRows( + [] __cuda_callable__( typename ViewType::RowView & row ) mutable + { + const IndexType j_diag = row.getRowIndex(); + IndexType c_diag = 0; + RealType v_diag; + + // find the diagonal entry + for( IndexType c = 0; c < row.getSize(); c++ ) + if( row.getColumnIndex( c ) == j_diag ) { + c_diag = c; + v_diag = row.getValue( c ); + break; + } + + if( c_diag > 0 ) { + // shift the subdiagonal elements + for( IndexType c = c_diag; c > 0; c-- ) + row.setElement( c, row.getColumnIndex( c - 1 ), row.getValue( c - 1 ) ); + + // write the diagonal entry to the first position + row.setElement( 0, j_diag, v_diag ); + } + } ); + } + +protected: + hypre_CSRMatrix* m = nullptr; + bool owns_handle = true; +}; + +} // namespace Matrices +} // namespace TNL + +#endif // HAVE_HYPRE diff --git a/src/TNL/Matrices/HypreParCSRMatrix.h b/src/TNL/Matrices/HypreParCSRMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..b409e8d7c5e6ab2b58d8cb4edef527aada8edfdc --- /dev/null +++ b/src/TNL/Matrices/HypreParCSRMatrix.h @@ -0,0 +1,410 @@ +#pragma once + +#ifdef HAVE_HYPRE + + #include <memory> // std::unique_ptr + + #include <TNL/MPI/Comm.h> + #include <TNL/MPI/Wrappers.h> + #include <TNL/Containers/Subrange.h> + #include <TNL/Containers/HypreVector.h> + #include <TNL/Matrices/HypreCSRMatrix.h> + +namespace TNL { +namespace Matrices { + +/** + * \brief Wrapper for Hypre's sequential CSR matrix. + * + * Links to upstream sources: + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.h + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.c + * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/seq_mv.h (catch-all interface) + * + * \ingroup Hypre + */ +class HypreParCSRMatrix +{ +public: + using RealType = HYPRE_Real; + using ValueType = RealType; + using DeviceType = HYPRE_Device; + using IndexType = HYPRE_Int; + using LocalRangeType = Containers::Subrange< IndexType >; + + using MatrixType = HypreCSRMatrix; + + using ValuesViewType = Containers::VectorView< RealType, DeviceType, IndexType >; + using ConstValuesViewType = typename ValuesViewType::ConstViewType; + using ColumnIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType >; + using ColumnIndexesViewType = typename ColumnIndexesVectorType::ViewType; + using ConstColumnIndexesViewType = typename ColumnIndexesVectorType::ConstViewType; + using SegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, IndexType >; + using ConstSegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, std::add_const_t< IndexType > >; + + HypreParCSRMatrix() = default; + + // TODO: behavior should depend on "owns_data" (shallow vs deep copy) + HypreParCSRMatrix( const HypreParCSRMatrix& other ) = delete; + + HypreParCSRMatrix( HypreParCSRMatrix&& other ) noexcept : m( other.m ) + { + other.m = nullptr; + } + + // TODO should do a deep copy + HypreParCSRMatrix& + operator=( const HypreParCSRMatrix& other ) = delete; + + HypreParCSRMatrix& + operator=( HypreParCSRMatrix&& other ) noexcept + { + m = other.m; + other.m = nullptr; + return *this; + } + + /** + * \brief Convert Hypre's format to \e HypreParCSRMatrix + * + * \param take_ownership indicates if the matrix should take ownership of + * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does + * not need it anymore. + */ + explicit HypreParCSRMatrix( hypre_ParCSRMatrix* handle, bool take_ownership = true ) + { + bind( handle, take_ownership ); + } + + /** + * \brief Wrap a global \e hypre_CSRMatrix into \e HypreParCSRMatrix + * + * Each rank will get its own independent \e HypreParCSRMatrix with the + * `MPI_COMM_SELF` communicator. The data is not copied, but + * \e HypreParCSRMatrix keeps a non-owning reference. + */ + static HypreParCSRMatrix + wrapCSRMatrix( hypre_CSRMatrix* matrix ) + { + const IndexType global_num_rows = hypre_CSRMatrixNumRows( matrix ); + const IndexType global_num_cols = hypre_CSRMatrixNumCols( matrix ); + IndexType row_starts[ 2 ]; + row_starts[ 0 ] = 0; + row_starts[ 1 ] = global_num_rows; + IndexType col_starts[ 2 ]; + col_starts[ 0 ] = 0; + col_starts[ 1 ] = global_num_cols; + + // create the handle + hypre_ParCSRMatrix* A = + hypre_ParCSRMatrixCreate( MPI_COMM_SELF, global_num_rows, global_num_cols, row_starts, col_starts, 0, 0, 0 ); + + // bind the diag block + hypre_CSRMatrixDestroy( hypre_ParCSRMatrixDiag( A ) ); + hypre_ParCSRMatrixDiag( A ) = matrix; + + // make sure that the first entry in each row of a square diagonal block is the diagonal element + HypreCSRMatrix diag_view( hypre_ParCSRMatrixDiag( A ), false ); + diag_view.reorderDiagonalEntries(); + + // set additional attributes + hypre_ParCSRMatrixNumNonzeros( A ) = hypre_CSRMatrixNumNonzeros( matrix ); + + // initialize auxiliary substructures + hypre_CSRMatrixSetRownnz( hypre_ParCSRMatrixDiag( A ) ); + hypre_MatvecCommPkgCreate( A ); + + // set the diag owner flag + HypreParCSRMatrix result( A ); + result.owns_diag = false; + return result; + } + + /** + * \brief Constructs a \e ParCSRMatrix distributed across the processors in + * \e communicator from a \e CSRMatrix on rank 0. + * + * \param global_row_starts Array of `nproc + 1` elements, where `nproc` is + * the number of ranks in the \e communicator. + * \param global_col_starts Array of `nproc + 1` elements, where `nproc` is + * the number of ranks in the \e communicator. + */ + static HypreParCSRMatrix + fromMasterRank( MPI_Comm communicator, IndexType* global_row_starts, IndexType* global_col_starts, hypre_CSRMatrix* matrix ) + { + TNL_ASSERT_TRUE( global_row_starts, "invalid input" ); + TNL_ASSERT_TRUE( global_col_starts, "invalid input" ); + TNL_ASSERT_TRUE( matrix, "invalid input" ); + + // NOTE: this call creates a matrix on host even when device support is + // enabled in Hypre + hypre_ParCSRMatrix* A = hypre_CSRMatrixToParCSRMatrix( communicator, matrix, global_row_starts, global_col_starts ); + + // make sure that the first entry in each row of a square diagonal block is the diagonal element + HypreCSRMatrix diag_view( hypre_ParCSRMatrixDiag( A ), false ); + diag_view.reorderDiagonalEntries(); + + // initialize auxiliary substructures + hypre_CSRMatrixSetRownnz( hypre_ParCSRMatrixDiag( A ) ); + hypre_ParCSRMatrixSetNumNonzeros( A ); + hypre_MatvecCommPkgCreate( A ); + + return HypreParCSRMatrix( A ); + } + + /** + * \brief Constructs a \e ParCSRMatrix distributed across the processors in + * \e communicator from a \e CSRMatrix on rank 0. + * + * \param x The values of the vector are unused, but its distribution is + * used for the distribution of the matrix **columns**. + * \param b The values of the vector are unused, but its distribution is + * used for the distribution of the matrix **rows**. + */ + static HypreParCSRMatrix + fromMasterRank( hypre_CSRMatrix* matrix, hypre_ParVector* x, hypre_ParVector* b ) + { + const MPI_Comm communicator = hypre_ParVectorComm( x ); + if( communicator == MPI_COMM_NULL ) + return {}; + + // construct global partitioning info on rank 0 + // (the result on other ranks is not used) + const int nproc = MPI::GetSize( communicator ); + std::unique_ptr< IndexType[] > sendbuf{ new IndexType[ nproc ] }; + + std::unique_ptr< IndexType[] > global_row_starts{ new IndexType[ nproc + 1 ] }; + for( int i = 0; i < nproc; i++ ) + sendbuf[ i ] = hypre_ParVectorFirstIndex( b ); + MPI::Alltoall( sendbuf.get(), 1, global_row_starts.get(), 1, communicator ); + global_row_starts[ nproc ] = hypre_ParVectorGlobalSize( b ); + + // if the matrix is square, global_col_starts should be equal to global_row_starts + // (otherwise Hypre puts everything into the offd block which would violate the + // AssumedPartition premise) + if( hypre_CSRMatrixNumRows( matrix ) == hypre_CSRMatrixNumCols( matrix ) ) { + return fromMasterRank( communicator, global_row_starts.get(), global_row_starts.get(), matrix ); + } + + // NOTE: this was not tested... + std::unique_ptr< IndexType[] > global_col_starts{ new IndexType[ nproc + 1 ] }; + for( int i = 0; i < nproc; i++ ) + sendbuf[ i ] = hypre_ParVectorFirstIndex( x ); + MPI::Alltoall( sendbuf.get(), 1, global_col_starts.get(), 1, communicator ); + global_col_starts[ nproc ] = hypre_ParVectorGlobalSize( x ); + + return fromMasterRank( communicator, global_row_starts.get(), global_col_starts.get(), matrix ); + } + + /** + * \brief Constructs a \e ParCSRMatrix from local blocks distributed across + * the processors in \e communicator. + * + * \param global_num_rows Global number of rows of the distributed matrix. + * \param global_num_cols Global number of columns of the distributed matrix. + * \param local_row_range The range `[begin, end)` of rows owned by the + * calling rank. + * \param local_col_range The range `[begin, end)` of columns owned by the + * calling rank. For square matrices it should be + * equal to \e local_row_range. + * \param matrix The local matrix block owned by the calling rank. The + * number of rows must match the size of \e local_row_range and the + * column indices must span the whole `[0, global_cols)` range. + */ + static HypreParCSRMatrix + fromLocalBlocks( MPI_Comm communicator, + IndexType global_num_rows, + IndexType global_num_cols, + LocalRangeType local_row_range, + LocalRangeType local_col_range, + hypre_CSRMatrix* local_A ) + { + TNL_ASSERT_TRUE( local_A, "invalid input" ); + + IndexType row_starts[ 2 ]; + row_starts[ 0 ] = local_row_range.getBegin(); + row_starts[ 1 ] = local_row_range.getEnd(); + IndexType col_starts[ 2 ]; + col_starts[ 0 ] = local_col_range.getBegin(); + col_starts[ 1 ] = local_col_range.getEnd(); + + hypre_ParCSRMatrix* A = + hypre_ParCSRMatrixCreate( communicator, global_num_rows, global_num_cols, row_starts, col_starts, 0, 0, 0 ); + + const IndexType first_col_diag = hypre_ParCSRMatrixFirstColDiag( A ); + const IndexType last_col_diag = hypre_ParCSRMatrixLastColDiag( A ); + + // Hypre function which splits local_A into the diagonal and off-diagonal blocks + GenerateDiagAndOffd( local_A, A, first_col_diag, last_col_diag ); + + // make sure that the first entry in each row of a square diagonal block is the diagonal element + HypreCSRMatrix diag_view( hypre_ParCSRMatrixDiag( A ), false ); + diag_view.reorderDiagonalEntries(); + + // initialize auxiliary substructures + hypre_CSRMatrixSetRownnz( hypre_ParCSRMatrixDiag( A ) ); + hypre_ParCSRMatrixSetNumNonzeros( A ); + hypre_MatvecCommPkgCreate( A ); + + return HypreParCSRMatrix( A ); + } + + operator const hypre_ParCSRMatrix*() const noexcept + { + return m; + } + + operator hypre_ParCSRMatrix*() noexcept + { + return m; + } + + // HYPRE_ParCSRMatrix is "equivalent" to pointer to hypre_ParCSRMatrix, but requires + // ugly C-style cast on the pointer (which is done even in Hypre itself) + // https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/HYPRE_parcsr_matrix.c + operator HYPRE_ParCSRMatrix() const noexcept + { + return (HYPRE_ParCSRMatrix) m; + } + + ~HypreParCSRMatrix() + { + reset(); + } + + LocalRangeType + getLocalRowRange() const + { + if( m == nullptr ) + return {}; + return { hypre_ParCSRMatrixRowStarts( m )[ 0 ], hypre_ParCSRMatrixRowStarts( m )[ 1 ] }; + } + + LocalRangeType + getLocalColumnRange() const + { + if( m == nullptr ) + return {}; + return { hypre_ParCSRMatrixColStarts( m )[ 0 ], hypre_ParCSRMatrixColStarts( m )[ 1 ] }; + } + + IndexType + getRows() const + { + if( m == nullptr ) + return 0; + return hypre_ParCSRMatrixGlobalNumRows( m ); + } + + IndexType + getColumns() const + { + if( m == nullptr ) + return 0; + return hypre_ParCSRMatrixGlobalNumCols( m ); + } + + IndexType + getNonzeroElementsCount() const + { + if( m == nullptr ) + return 0; + return hypre_ParCSRMatrixNumNonzeros( m ); + } + + MPI_Comm + getCommunicator() const + { + if( m == nullptr ) + return MPI_COMM_NULL; + return hypre_ParCSRMatrixComm( m ); + } + + MatrixType + getDiagonalBlock() + { + if( m == nullptr ) + return {}; + return HypreCSRMatrix( hypre_ParCSRMatrixDiag( m ), false ); + } + + MatrixType + getOffdiagonalBlock() + { + if( m == nullptr ) + return {}; + return HypreCSRMatrix( hypre_ParCSRMatrixOffd( m ), false ); + } + + Containers::VectorView< HYPRE_Int, HYPRE_Device, HYPRE_Int > + getOffdiagonalColumnsMapping() + { + if( m == nullptr ) + return {}; + HYPRE_Int* col_map_offd = hypre_ParCSRMatrixColMapOffd( m ); + const HYPRE_Int num_cols = hypre_CSRMatrixNumCols( hypre_ParCSRMatrixOffd( m ) ); + return { col_map_offd, num_cols }; + } + + //! \brief Constructs a local matrix by merging the diagonal and off-diagonal blocks. + HypreCSRMatrix + getMergedLocalMatrix() const + { + hypre_CSRMatrix* local_A = hypre_MergeDiagAndOffd( m ); + return HypreCSRMatrix( local_A, true ); + } + + /** + * \brief Convert Hypre's format to \e HypreParCSRMatrix + * + * \param take_ownership indicates if the matrix should take ownership of + * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does + * not need it anymore. + */ + void + bind( hypre_ParCSRMatrix* handle, bool take_ownership = true ) + { + // drop/deallocate the current data + reset(); + + // take ownership of the handle + m = handle; + owns_handle = take_ownership; + } + + //! \brief Reset the matrix to empty state. + void + reset() + { + if( owns_handle && m != nullptr ) { + if( ! owns_diag ) + // hypreParCSRMatrixDestroy does not work when diag is a nullptr + hypre_ParCSRMatrixDiag( m ) = hypre_CSRMatrixCreate( 0, 0, 0 ); + if( ! owns_offd ) + // hypreParCSRMatrixDestroy does not work when offd is a nullptr + hypre_ParCSRMatrixOffd( m ) = hypre_CSRMatrixCreate( 0, 0, 0 ); + if( ! owns_col_map_offd ) + hypre_ParCSRMatrixColMapOffd( m ) = nullptr; + hypre_ParCSRMatrixDestroy( m ); + m = nullptr; + } + else + m = nullptr; + owns_handle = true; + owns_diag = true; + owns_offd = true; + owns_col_map_offd = true; + } + +protected: + hypre_ParCSRMatrix* m = nullptr; + bool owns_handle = true; + bool owns_diag = true; + bool owns_offd = true; + bool owns_col_map_offd = true; +}; + +} // namespace Matrices +} // namespace TNL + +#endif // HAVE_HYPRE diff --git a/src/TNL/Solvers/Linear/Hypre.h b/src/TNL/Solvers/Linear/Hypre.h new file mode 100644 index 0000000000000000000000000000000000000000..229da24f65e2695e51c2c4876769faf7d3992d52 --- /dev/null +++ b/src/TNL/Solvers/Linear/Hypre.h @@ -0,0 +1,771 @@ +// Copyright (c) 2004-2022 Tomáš Oberhuber et al. +// +// This file is part of TNL - Template Numerical Library (https://tnl-project.org/) +// +// SPDX-License-Identifier: MIT + +// Implemented by: Jakub Klinkovský + +#pragma once + +#ifdef HAVE_HYPRE + + #include <TNL/Containers/HypreParVector.h> + #include <TNL/Matrices/HypreParCSRMatrix.h> + +namespace TNL { +namespace Solvers { +namespace Linear { + +//! \addtogroup Hypre +//! @{ + +//! \brief Abstract class for Hypre's solvers and preconditioners +class HypreSolver +{ +public: + //! \brief How to treat errors returned by Hypre function calls + enum ErrorMode + { + IGNORE_HYPRE_ERRORS, ///< Ignore Hypre errors + WARN_HYPRE_ERRORS, ///< Issue warnings on Hypre errors + ABORT_HYPRE_ERRORS ///< Abort on Hypre errors (default in base class) + }; + +protected: + //! Handle for the Hypre solver + HYPRE_Solver solver = nullptr; + + //! The linear system matrix + const Matrices::HypreParCSRMatrix* A = nullptr; + + //! Indicates if Hypre's setup function was already called + mutable bool setup_called = false; + + //! How to treat Hypre errors + mutable ErrorMode error_mode = ABORT_HYPRE_ERRORS; + + //! Hook function that is called at the end of \ref solve + virtual void + postSolveHook() const + {} + +public: + HypreSolver() = default; + + explicit HypreSolver( const Matrices::HypreParCSRMatrix& A ); + + //! Type-cast to \e HYPRE_Solver + virtual operator HYPRE_Solver() const + { + return solver; + } + + //! Hypre's internal setup function + virtual HYPRE_PtrToParSolverFcn + setupFcn() const = 0; + + //! Hypre's internal solve function + virtual HYPRE_PtrToParSolverFcn + solveFcn() const = 0; + + /** + * \brief Set the matrix of the linear system to be solved. + * + * This function also resets the current state of the solver causing the + * Hypre's internal setup function to be called before the solve function. + */ + virtual void setMatrix( const Matrices::HypreParCSRMatrix& op ) + { + A = &op; + setup_called = false; + } + + //! Solve the linear system Ax=b + virtual void + solve( const Containers::HypreParVector& b, Containers::HypreParVector& x ) const; + + /** + * \brief Set the behavior for treating Hypre errors, see the \ref ErrorMode + * enum. The default mode in the base class is \ref ABORT_HYPRE_ERRORS. + */ + void + setErrorMode( ErrorMode err_mode ) const + { + error_mode = err_mode; + } + + virtual ~HypreSolver() = default; +}; + +/** + * \brief Wrapper for the PCG solver in Hypre + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HyprePCG solver; + * HYPRE_PCGSetTol(solver, 1e-7); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HyprePCG : public HypreSolver +{ +private: + HypreSolver* precond = nullptr; + +public: + explicit HyprePCG( MPI_Comm comm ); + + explicit HyprePCG( const Matrices::HypreParCSRMatrix& A ); + + ~HyprePCG() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + //! Set the Hypre solver to be used as a preconditioner + void + setPreconditioner( HypreSolver& precond ); + + void + setTol( double tol ) + { + HYPRE_PCGSetTol( solver, tol ); + } + + void + setAbsTol( double atol ) + { + HYPRE_PCGSetAbsoluteTol( solver, atol ); + } + + void + setMaxIter( int max_iter ) + { + HYPRE_PCGSetMaxIter( solver, max_iter ); + } + + void + setPrintLevel( int print_level ) + { + HYPRE_PCGSetPrintLevel( solver, print_level ); + } + + /** + * \brief Use the L2 norm of the residual for measuring PCG convergence, + * plus (optionally) 1) periodically recompute true residuals from scratch; + * and 2) enable residual-based stopping criteria. + */ + void + setResidualConvergenceOptions( int res_frequency = -1, double rtol = 0.0 ); + + HYPRE_Int + getNumIterations() const + { + HYPRE_Int num_it; + HYPRE_ParCSRPCGGetNumIterations( solver, &num_it ); + return num_it; + } + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRPCGSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRPCGSolve; + } + +protected: + void + postSolveHook() const override; +}; + +/** + * \brief Wrapper for the BiCGSTAB solver in Hypre + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreBiCGSTAB solver; + * HYPRE_BiCGSTABSetTol(solver, 1e-7); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreBiCGSTAB : public HypreSolver +{ +private: + HypreSolver* precond = nullptr; + + // Hypre does not provide a way to query this from the BiCGSTAB solver + // https://github.com/hypre-space/hypre/issues/627 + HYPRE_Int print_level = 0; + +public: + explicit HypreBiCGSTAB( MPI_Comm comm ); + + explicit HypreBiCGSTAB( const Matrices::HypreParCSRMatrix& A_ ); + + ~HypreBiCGSTAB() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + //! Set the Hypre solver to be used as a preconditioner + void + setPreconditioner( HypreSolver& precond ); + + void + setTol( double tol ) + { + HYPRE_BiCGSTABSetTol( solver, tol ); + } + + void + setAbsTol( double atol ) + { + HYPRE_BiCGSTABSetAbsoluteTol( solver, atol ); + } + + void + setMaxIter( int max_iter ) + { + HYPRE_BiCGSTABSetMaxIter( solver, max_iter ); + } + + void + setPrintLevel( int print_level ) + { + HYPRE_BiCGSTABSetPrintLevel( solver, print_level ); + this->print_level = print_level; + } + + HYPRE_Int + getNumIterations() const + { + HYPRE_Int num_it; + HYPRE_ParCSRBiCGSTABGetNumIterations( solver, &num_it ); + return num_it; + } + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRBiCGSTABSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRBiCGSTABSolve; + } + +protected: + void + postSolveHook() const override; +}; + +/** + * \brief Wrapper for the GMRES solver in Hypre + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreGMRES solver; + * HYPRE_GMRESSetTol(solver, 1e-7); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreGMRES : public HypreSolver +{ +private: + HypreSolver* precond = nullptr; + +public: + explicit HypreGMRES( MPI_Comm comm ); + + explicit HypreGMRES( const Matrices::HypreParCSRMatrix& A_ ); + + ~HypreGMRES() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + //! Set the Hypre solver to be used as a preconditioner + void + setPreconditioner( HypreSolver& precond ); + + void + setTol( double tol ) + { + HYPRE_GMRESSetTol( solver, tol ); + } + + void + setAbsTol( double tol ) + { + HYPRE_GMRESSetAbsoluteTol( solver, tol ); + } + + void + setMaxIter( int max_iter ) + { + HYPRE_GMRESSetMaxIter( solver, max_iter ); + } + + void + setKDim( int k_dim ) + { + HYPRE_GMRESSetKDim( solver, k_dim ); + } + + void + setPrintLevel( int print_level ) + { + HYPRE_GMRESSetPrintLevel( solver, print_level ); + } + + HYPRE_Int + getNumIterations() const + { + HYPRE_Int num_it; + HYPRE_ParCSRGMRESGetNumIterations( solver, &num_it ); + return num_it; + } + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRGMRESSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRGMRESSolve; + } + +protected: + void + postSolveHook() const override; +}; + +/** + * \brief Wrapper for the Flexible GMRES solver in Hypre + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreFlexGMRES solver; + * HYPRE_FlexGMRESSetTol(solver, 1e-7); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreFlexGMRES : public HypreSolver +{ +private: + HypreSolver* precond = nullptr; + +public: + explicit HypreFlexGMRES( MPI_Comm comm ); + + explicit HypreFlexGMRES( const Matrices::HypreParCSRMatrix& A_ ); + + ~HypreFlexGMRES() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + //! Set the Hypre solver to be used as a preconditioner + void + setPreconditioner( HypreSolver& precond ); + + void + setTol( double tol ) + { + HYPRE_FlexGMRESSetTol( solver, tol ); + } + + void + setMaxIter( int max_iter ) + { + HYPRE_FlexGMRESSetMaxIter( solver, max_iter ); + } + + void + setKDim( int k_dim ) + { + HYPRE_FlexGMRESSetKDim( solver, k_dim ); + } + + void + setPrintLevel( int print_level ) + { + HYPRE_FlexGMRESSetPrintLevel( solver, print_level ); + } + + HYPRE_Int + getNumIterations() const + { + HYPRE_Int num_it; + HYPRE_ParCSRFlexGMRESGetNumIterations( solver, &num_it ); + return num_it; + } + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRFlexGMRESSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRFlexGMRESSolve; + } + +protected: + void + postSolveHook() const override; +}; + +//! \brief Wrapper for the identity operator as a Hypre solver +class HypreIdentity : public HypreSolver +{ +public: + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) hypre_ParKrylovIdentitySetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) hypre_ParKrylovIdentity; + } +}; + +//! \brief Wrapper for the Jacobi preconditioner in Hypre +class HypreDiagScale : public HypreSolver +{ +public: + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRDiagScaleSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRDiagScale; + } +}; + +/** + * \brief Wrapper for Hypre's preconditioner that is intended for matrices that + * are triangular in some ordering. + * + * Finds correct ordering and performs forward substitution on processor as + * approximate inverse. Exact on one processor. + */ +class HypreTriSolve : public HypreSolver +{ +public: + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSROnProcTriSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSROnProcTriSolve; + } +}; + +/** + * \brief Wrapper for the ParaSails preconditioner in Hypre + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreParaSails precond; + * HYPRE_ParaSailsSetSym(precond, 1); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreParaSails : public HypreSolver +{ +private: + //! \brief Reset the preconditioner to the default state + void + reset( MPI_Comm comm ); + +public: + explicit HypreParaSails( MPI_Comm comm ); + + explicit HypreParaSails( const Matrices::HypreParCSRMatrix& A ); + + ~HypreParaSails() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParaSailsSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ParaSailsSolve; + } +}; + +/** + * \brief Wrapper for the Euclid preconditioner in Hypre + * + * Euclid implements the Parallel Incomplete LU factorization technique. For + * more information see: + * "A Scalable Parallel Algorithm for Incomplete Factor Preconditioning" by + * David Hysom and Alex Pothen, https://doi.org/10.1137/S1064827500376193 + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreEuclid precond; + * HYPRE_EuclidSetLevel(precond, 2); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreEuclid : public HypreSolver +{ +private: + //! \brief Reset the preconditioner to the default state + void + reset( MPI_Comm comm ); + +public: + explicit HypreEuclid( MPI_Comm comm ); + + explicit HypreEuclid( const Matrices::HypreParCSRMatrix& A ); + + ~HypreEuclid() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_EuclidSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_EuclidSolve; + } +}; + +/** + * \brief Wrapper for Hypre's native parallel ILU preconditioner. + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreILU precond; + * HYPRE_ILUSetLevelOfFill(precond, 1); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreILU : public HypreSolver +{ +private: + //! \brief Set the ILU default options + void + setDefaultOptions(); + + //! \brief Reset the preconditioner to the default state (including default options). + void + reset(); + +public: + HypreILU(); + + explicit HypreILU( const Matrices::HypreParCSRMatrix& A ); + + ~HypreILU() override; + + //! Set the print level: 0 = none, 1 = setup, 2 = solve, 3 = setup+solve + void + setPrintLevel( HYPRE_Int print_level ) + { + HYPRE_ILUSetPrintLevel( solver, print_level ); + } + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ILUSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_ILUSolve; + } +}; + +/** \brief Wrapper for the BoomerAMG solver/preconditioner in Hypre + * + * Note that the wrapper class sets its own default options that are different + * from Hypre. By default, the instance is set up for preconditioner use, i.e. + * with zero tolerance and one V cycle. At least these two parameters need to + * be changed when used as a solver. + * + * Parameters can be set using native Hypre functions, e.g. + * + * \code + * TNL::Solvers::Linear::HypreBoomerAMG solver; + * HYPRE_BoomerAMGSetTol(solver, 1e-7); + * HYPRE_BoomerAMGSetMaxIter(solver, 20); + * \endcode + * + * See the [Hypre Reference Manual][manual] for the available parameters. + * + * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/ + */ +class HypreBoomerAMG : public HypreSolver +{ +private: + //! \brief Default, generally robust, BoomerAMG options + void + setDefaultOptions(); + + //! \brief Reset the preconditioner to the default state (including default options). + void + reset(); + +public: + HypreBoomerAMG(); + + explicit HypreBoomerAMG( const Matrices::HypreParCSRMatrix& A ); + + ~HypreBoomerAMG() override; + + void + setMatrix( const Matrices::HypreParCSRMatrix& op ) override; + + //! \brief More robust options for systems of PDEs + void + setSystemsOptions( int dim, bool order_bynodes = false ); + + /** + * \brief Set parameters to use AIR AMG solver for advection-dominated problems. + * + * See "Nonsymmetric Algebraic Multigrid Based on Local Approximate Ideal + * Restriction (AIR)," Manteuffel, Ruge, Southworth, SISC (2018), + * DOI:/10.1137/17M1144350. + * + * \param restrict_type Defines which parallel restriction operator is used. + * There are the following options: + * 0: transpose of the interpolation operator + * 1: AIR-1 - Approximate Ideal Restriction (distance 1) + * 2: AIR-2 - Approximate Ideal Restriction (distance 2) + * \param relax_order Defines in which order the points are relaxed. There + * are the following options: + * 0: the points are relaxed in natural or lexicographic + * order on each processor + * 1: CF-relaxation is used + */ + void + setAdvectiveOptions( int restrict_type = 0, int relax_order = 1 ); + + //! Set the print level: 0 = none, 1 = setup, 2 = solve, 3 = setup+solve + void + setPrintLevel( int print_level ) + { + HYPRE_BoomerAMGSetPrintLevel( solver, print_level ); + } + + void + setMaxIter( int max_iter ) + { + HYPRE_BoomerAMGSetMaxIter( solver, max_iter ); + } + + void + setTol( double tol ) + { + HYPRE_BoomerAMGSetTol( solver, tol ); + } + + HYPRE_Int + getNumIterations() const + { + HYPRE_Int num_it; + HYPRE_BoomerAMGGetNumIterations( solver, &num_it ); + return num_it; + } + + HYPRE_PtrToParSolverFcn + setupFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_BoomerAMGSetup; + } + + HYPRE_PtrToParSolverFcn + solveFcn() const override + { + return (HYPRE_PtrToParSolverFcn) HYPRE_BoomerAMGSolve; + } + +protected: + void + postSolveHook() const override; +}; + +} // namespace Linear +} // namespace Solvers +} // namespace TNL + + #include "Hypre.hpp" + +// this is a Doxygen end-group marker +//! @} + +#endif // HAVE_HYPRE diff --git a/src/TNL/Solvers/Linear/Hypre.hpp b/src/TNL/Solvers/Linear/Hypre.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a394a30e945ea82497db26c98b1554ff987b5c29 --- /dev/null +++ b/src/TNL/Solvers/Linear/Hypre.hpp @@ -0,0 +1,561 @@ +// Copyright (c) 2004-2022 Tomáš Oberhuber et al. +// +// This file is part of TNL - Template Numerical Library (https://tnl-project.org/) +// +// SPDX-License-Identifier: MIT + +// Implemented by: Jakub Klinkovský + +#pragma once + +#ifdef HAVE_HYPRE + + #include "Hypre.h" + +namespace TNL { +namespace Solvers { +namespace Linear { + +HypreSolver::HypreSolver( const Matrices::HypreParCSRMatrix& A ) +{ + this->A = &A; +} + +void +HypreSolver::solve( const Containers::HypreParVector& b, Containers::HypreParVector& x ) const +{ + HYPRE_Int err_flag; + if( A == nullptr ) { + throw std::runtime_error( "HypreSolver::solve(...) : HypreParCSRMatrix A is missing" ); + } + + if( ! setup_called ) { + err_flag = setupFcn()( *this, *A, b, x ); + if( err_flag != 0 ) { + if( error_mode == WARN_HYPRE_ERRORS ) + std::cout << "Error during setup! Error code: " << err_flag << std::endl; + else if( error_mode == ABORT_HYPRE_ERRORS ) + throw std::runtime_error( "Error during setup! Error code: " + std::to_string( err_flag ) ); + } + hypre_error_flag = 0; + setup_called = true; + } + + err_flag = solveFcn()( *this, *A, b, x ); + if( err_flag != 0 ) { + if( error_mode == WARN_HYPRE_ERRORS ) + std::cout << "Error during solve! Error code: " << err_flag << std::endl; + else if( error_mode == ABORT_HYPRE_ERRORS ) + throw std::runtime_error( "Error during solve! Error code: " + std::to_string( err_flag ) ); + } + hypre_error_flag = 0; + + postSolveHook(); +} + +HyprePCG::HyprePCG( MPI_Comm comm ) +{ + HYPRE_ParCSRPCGCreate( comm, &solver ); +} + +HyprePCG::HyprePCG( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A ) +{ + const MPI_Comm comm = A.getCommunicator(); + HYPRE_ParCSRPCGCreate( comm, &solver ); +} + +HyprePCG::~HyprePCG() +{ + HYPRE_ParCSRPCGDestroy( solver ); +} + +void +HyprePCG::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + HypreSolver::setMatrix( op ); + if( precond != nullptr ) + precond->setMatrix( *A ); +} + +void +HyprePCG::setPreconditioner( HypreSolver& precond_ ) +{ + precond = &precond_; + HYPRE_ParCSRPCGSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ ); +} + +void +HyprePCG::setResidualConvergenceOptions( int res_frequency, double rtol ) +{ + HYPRE_PCGSetTwoNorm( solver, 1 ); + if( res_frequency > 0 ) + HYPRE_PCGSetRecomputeResidualP( solver, res_frequency ); + if( rtol > 0.0 ) + HYPRE_PCGSetResidualTol( solver, rtol ); +} + +void +HyprePCG::postSolveHook() const +{ + HYPRE_Int print_level; + HYPRE_PCGGetPrintLevel( solver, &print_level ); + + if( print_level > 0 ) { + HYPRE_Int num_iterations; + HYPRE_PCGGetNumIterations( solver, &num_iterations ); + + double final_res_norm; + HYPRE_PCGGetFinalRelativeResidualNorm( solver, &final_res_norm ); + + const MPI_Comm comm = A->getCommunicator(); + if( MPI::GetRank( comm ) == 0 ) { + std::cout << "PCG Iterations = " << num_iterations << std::endl + << "Final PCG Relative Residual Norm = " << final_res_norm << std::endl; + } + } +} + +HypreBiCGSTAB::HypreBiCGSTAB( MPI_Comm comm ) +{ + HYPRE_ParCSRBiCGSTABCreate( comm, &solver ); +} + +HypreBiCGSTAB::HypreBiCGSTAB( const Matrices::HypreParCSRMatrix& A_ ) : HypreSolver( A_ ) +{ + const MPI_Comm comm = A->getCommunicator(); + HYPRE_ParCSRBiCGSTABCreate( comm, &solver ); +} + +HypreBiCGSTAB::~HypreBiCGSTAB() +{ + HYPRE_ParCSRBiCGSTABDestroy( solver ); +} + +void +HypreBiCGSTAB::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + HypreSolver::setMatrix( op ); + if( precond != nullptr ) + precond->setMatrix( *A ); +} + +void +HypreBiCGSTAB::setPreconditioner( HypreSolver& precond_ ) +{ + precond = &precond_; + + HYPRE_ParCSRBiCGSTABSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ ); +} + +void +HypreBiCGSTAB::postSolveHook() const +{ + if( print_level > 0 ) { + HYPRE_Int num_iterations; + HYPRE_BiCGSTABGetNumIterations( solver, &num_iterations ); + + double final_res_norm; + HYPRE_BiCGSTABGetFinalRelativeResidualNorm( solver, &final_res_norm ); + + const MPI_Comm comm = A->getCommunicator(); + if( MPI::GetRank( comm ) == 0 ) { + std::cout << "BiCGSTAB Iterations = " << num_iterations << std::endl + << "Final BiCGSTAB Relative Residual Norm = " << final_res_norm << std::endl; + } + } +} + +HypreGMRES::HypreGMRES( MPI_Comm comm ) +{ + HYPRE_ParCSRGMRESCreate( comm, &solver ); +} + +HypreGMRES::HypreGMRES( const Matrices::HypreParCSRMatrix& A_ ) : HypreSolver( A_ ) +{ + const MPI_Comm comm = A->getCommunicator(); + HYPRE_ParCSRGMRESCreate( comm, &solver ); +} + +HypreGMRES::~HypreGMRES() +{ + HYPRE_ParCSRGMRESDestroy( solver ); +} + +void +HypreGMRES::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + HypreSolver::setMatrix( op ); + if( precond != nullptr ) + precond->setMatrix( *A ); +} + +void +HypreGMRES::setPreconditioner( HypreSolver& precond_ ) +{ + precond = &precond_; + HYPRE_ParCSRGMRESSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ ); +} + +void +HypreGMRES::postSolveHook() const +{ + HYPRE_Int print_level; + HYPRE_GMRESGetPrintLevel( solver, &print_level ); + + if( print_level > 0 ) { + HYPRE_Int num_iterations; + HYPRE_GMRESGetNumIterations( solver, &num_iterations ); + + double final_res_norm; + HYPRE_GMRESGetFinalRelativeResidualNorm( solver, &final_res_norm ); + + const MPI_Comm comm = A->getCommunicator(); + if( MPI::GetRank( comm ) == 0 ) { + std::cout << "GMRES Iterations = " << num_iterations << std::endl + << "Final GMRES Relative Residual Norm = " << final_res_norm << std::endl; + } + } +} + +HypreFlexGMRES::HypreFlexGMRES( MPI_Comm comm ) +{ + HYPRE_ParCSRFlexGMRESCreate( comm, &solver ); +} + +HypreFlexGMRES::HypreFlexGMRES( const Matrices::HypreParCSRMatrix& A_ ) : HypreSolver( A_ ) +{ + const MPI_Comm comm = A->getCommunicator(); + HYPRE_ParCSRFlexGMRESCreate( comm, &solver ); +} + +HypreFlexGMRES::~HypreFlexGMRES() +{ + HYPRE_ParCSRFlexGMRESDestroy( solver ); +} + +void +HypreFlexGMRES::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + HypreSolver::setMatrix( op ); + if( precond != nullptr ) + precond->setMatrix( *A ); +} + +void +HypreFlexGMRES::setPreconditioner( HypreSolver& precond_ ) +{ + precond = &precond_; + HYPRE_ParCSRFlexGMRESSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ ); +} + +void +HypreFlexGMRES::postSolveHook() const +{ + HYPRE_Int print_level; + HYPRE_FlexGMRESGetPrintLevel( solver, &print_level ); + + if( print_level > 0 ) { + HYPRE_Int num_iterations; + HYPRE_FlexGMRESGetNumIterations( solver, &num_iterations ); + + double final_res_norm; + HYPRE_FlexGMRESGetFinalRelativeResidualNorm( solver, &final_res_norm ); + + const MPI_Comm comm = A->getCommunicator(); + if( MPI::GetRank( comm ) == 0 ) { + std::cout << "FlexGMRES Iterations = " << num_iterations << std::endl + << "Final FlexGMRES Relative Residual Norm = " << final_res_norm << std::endl; + } + } +} + +HypreParaSails::HypreParaSails( MPI_Comm comm ) +{ + reset( comm ); +} + +HypreParaSails::HypreParaSails( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A ) +{ + reset( A.getCommunicator() ); +} + +HypreParaSails::~HypreParaSails() +{ + HYPRE_ParaSailsDestroy( solver ); +} + +void +HypreParaSails::reset( MPI_Comm comm ) +{ + if( solver != nullptr ) + HYPRE_ParaSailsDestroy( solver ); + HYPRE_ParaSailsCreate( comm, &solver ); +} + +void +HypreParaSails::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + if( A != nullptr ) + reset( A->getCommunicator() ); + HypreSolver::setMatrix( op ); +} + +HypreEuclid::HypreEuclid( MPI_Comm comm ) +{ + HYPRE_EuclidCreate( comm, &solver ); +} + +HypreEuclid::HypreEuclid( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A ) +{ + HYPRE_EuclidCreate( A.getCommunicator(), &solver ); +} + +HypreEuclid::~HypreEuclid() +{ + HYPRE_EuclidDestroy( solver ); +} + +void +HypreEuclid::reset( MPI_Comm comm ) +{ + if( solver != nullptr ) + HYPRE_EuclidDestroy( solver ); + HYPRE_EuclidCreate( comm, &solver ); +} + +void +HypreEuclid::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + if( A != nullptr ) + reset( A->getCommunicator() ); + HypreSolver::setMatrix( op ); +} + +HypreILU::HypreILU() +{ + reset(); +} + +HypreILU::HypreILU( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A ) +{ + reset(); +} + +HypreILU::~HypreILU() +{ + HYPRE_ILUDestroy( solver ); +} + +void +HypreILU::setDefaultOptions() +{ + // Maximum iterations; 1 iter for preconditioning + HYPRE_ILUSetMaxIter( solver, 1 ); + + // The tolerance when used as a smoother; set to 0.0 for preconditioner + HYPRE_ILUSetTol( solver, 0.0 ); + + // The type of incomplete LU used locally and globally (see class doc) + HYPRE_ILUSetType( solver, 0 ); // 0: ILU(k) locally and block Jacobi globally + + // Fill level 'k' for ILU(k) + HYPRE_ILUSetLevelOfFill( solver, 0 ); + + // Local reordering scheme; 0 = no reordering, 1 = reverse Cuthill-McKee + HYPRE_ILUSetLocalReordering( solver, 1 ); +} + +void +HypreILU::reset() +{ + if( solver != nullptr ) + HYPRE_ILUDestroy( solver ); + HYPRE_ILUCreate( &solver ); + setDefaultOptions(); +} + +void +HypreILU::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + reset(); + HypreSolver::setMatrix( op ); +} + +HypreBoomerAMG::HypreBoomerAMG() +{ + reset(); +} + +HypreBoomerAMG::HypreBoomerAMG( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A ) +{ + reset(); +} + +HypreBoomerAMG::~HypreBoomerAMG() +{ + HYPRE_BoomerAMGDestroy( solver ); +} + +void +HypreBoomerAMG::setDefaultOptions() +{ + #if ! defined( HYPRE_USING_GPU ) + // AMG coarsening options: + const int coarsen_type = 10; // 10 = HMIS, 8 = PMIS, 6 = Falgout, 0 = CLJP + const int agg_levels = 1; // number of aggressive coarsening levels + const double theta = 0.25; // strength threshold: 0.25, 0.5, 0.8 + + // AMG interpolation options: + const int interp_type = 6; // 6 = extended+i, 0 = classical + const int Pmax = 4; // max number of elements per row in P + + // AMG relaxation options: + const int relax_type = 8; // 8 = l1-GS, 6 = symm. GS, 3 = GS, 18 = l1-Jacobi + const int relax_sweeps = 1; // relaxation sweeps on each level + + // Additional options: + const int print_level = 0; // 0 = none, 1 = setup, 2 = solve, 3 = setup+solve + const int max_levels = 25; // max number of levels in AMG hierarchy + #else + // AMG coarsening options: + const int coarsen_type = 8; // 10 = HMIS, 8 = PMIS, 6 = Falgout, 0 = CLJP + const int agg_levels = 0; // number of aggressive coarsening levels + const double theta = 0.25; // strength threshold: 0.25, 0.5, 0.8 + + // AMG interpolation options: + const int interp_type = 6; // 6 = extended+i, or 18 = extended+e + const int Pmax = 4; // max number of elements per row in P + + // AMG relaxation options: + const int relax_type = 18; // 18 = l1-Jacobi, or 16 = Chebyshev + const int relax_sweeps = 1; // relaxation sweeps on each level + + // Additional options: + const int print_level = 0; // 0 = none, 1 = setup, 2 = solve, 3 = setup+solve + const int max_levels = 25; // max number of levels in AMG hierarchy + #endif + + HYPRE_BoomerAMGSetCoarsenType( solver, coarsen_type ); + HYPRE_BoomerAMGSetAggNumLevels( solver, agg_levels ); + HYPRE_BoomerAMGSetRelaxType( solver, relax_type ); + HYPRE_BoomerAMGSetNumSweeps( solver, relax_sweeps ); + HYPRE_BoomerAMGSetStrongThreshold( solver, theta ); + HYPRE_BoomerAMGSetInterpType( solver, interp_type ); + HYPRE_BoomerAMGSetPMaxElmts( solver, Pmax ); + HYPRE_BoomerAMGSetPrintLevel( solver, print_level ); + HYPRE_BoomerAMGSetMaxLevels( solver, max_levels ); + + // Use as a preconditioner (one V-cycle, zero tolerance) + HYPRE_BoomerAMGSetMaxIter( solver, 1 ); + HYPRE_BoomerAMGSetTol( solver, 0.0 ); +} + +void +HypreBoomerAMG::reset() +{ + if( solver != nullptr ) + HYPRE_BoomerAMGDestroy( solver ); + HYPRE_BoomerAMGCreate( &solver ); + setDefaultOptions(); +} + +void +HypreBoomerAMG::setMatrix( const Matrices::HypreParCSRMatrix& op ) +{ + reset(); + HypreSolver::setMatrix( op ); +} + +void +HypreBoomerAMG::setSystemsOptions( int dim, bool order_bynodes ) +{ + HYPRE_BoomerAMGSetNumFunctions( solver, dim ); + + // The default "system" ordering in Hypre is byVDIM. When using byNODES + // ordering, we have to specify the ordering explicitly. + if( order_bynodes ) { + TNL_ASSERT_TRUE( A->getRows() % dim == 0, "Ordering does not work as claimed!" ); + const HYPRE_Int nnodes = A->getRows() / dim; + + // generate DofFunc mapping on the host + HYPRE_Int* h_mapping = hypre_CTAlloc( HYPRE_Int, A->getRows(), HYPRE_MEMORY_HOST ); + HYPRE_Int k = 0; + for( int i = 0; i < dim; i++ ) + for( HYPRE_Int j = 0; j < nnodes; j++ ) + h_mapping[ k++ ] = i; + + #if defined( HYPRE_USING_GPU ) + // the mapping is assumed to be a device pointer + HYPRE_Int* mapping = hypre_CTAlloc( HYPRE_Int, A->getRows(), HYPRE_MEMORY_DEVICE ); + hypre_TMemcpy( mapping, h_mapping, HYPRE_Int, A->getRows(), HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST ); + hypre_TFree( h_mapping, HYPRE_MEMORY_HOST ); + #else + HYPRE_Int* mapping = h_mapping; + #endif + + // Hypre takes ownership and frees the pointer in HYPRE_BoomerAMGDestroy + HYPRE_BoomerAMGSetDofFunc( solver, mapping ); + } + + // more robust options with respect to convergence + HYPRE_BoomerAMGSetAggNumLevels( solver, 0 ); + HYPRE_BoomerAMGSetStrongThreshold( solver, 0.5 ); +} + +void +HypreBoomerAMG::setAdvectiveOptions( int restrict_type, int relax_order ) +{ + HYPRE_BoomerAMGSetRestriction( solver, restrict_type ); + HYPRE_BoomerAMGSetRelaxOrder( solver, relax_order ); + + const int interp_type = 100; + const int relax_type = 8; + const int coarsen_type = 6; + const double strength_tolC = 0.1; + const double strength_tolR = 0.01; + const double filter_tolR = 0.0; + const double filterA_tol = 0.0; + + HYPRE_BoomerAMGSetInterpType( solver, interp_type ); + HYPRE_BoomerAMGSetRelaxType( solver, relax_type ); + HYPRE_BoomerAMGSetCoarsenType( solver, coarsen_type ); + HYPRE_BoomerAMGSetStrongThreshold( solver, strength_tolC ); + HYPRE_BoomerAMGSetStrongThresholdR( solver, strength_tolR ); + HYPRE_BoomerAMGSetFilterThresholdR( solver, filter_tolR ); + HYPRE_BoomerAMGSetADropTol( solver, filterA_tol ); + // type = -1: drop based on row inf-norm + HYPRE_BoomerAMGSetADropType( solver, -1 ); + + // disable aggressive coarsening + HYPRE_BoomerAMGSetAggNumLevels( solver, 0 ); + + // set number of sweeps (up and down cycles, and the coarsest level) + HYPRE_BoomerAMGSetNumSweeps( solver, 1 ); +} + +void +HypreBoomerAMG::postSolveHook() const +{ + HYPRE_Int print_level; + HYPRE_BoomerAMGGetPrintLevel( solver, &print_level ); + + if( print_level > 1 ) { + HYPRE_Int num_iterations; + HYPRE_BoomerAMGGetNumIterations( solver, &num_iterations ); + + double final_res_norm; + HYPRE_BoomerAMGGetFinalRelativeResidualNorm( solver, &final_res_norm ); + + const MPI_Comm comm = A->getCommunicator(); + if( MPI::GetRank( comm ) == 0 ) { + std::cout << "BoomerAMG Iterations = " << num_iterations << std::endl + << "Final BoomerAMG Relative Residual Norm = " << final_res_norm << std::endl; + } + } +} + +} // namespace Linear +} // namespace Solvers +} // namespace TNL + +#endif // HAVE_HYPRE diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt index 0deb077d307abfa4356f1f7ec956a5aa599f3da3..84b2ceeaae2e37540e3a34618c5c5f19e07e6497 100644 --- a/src/UnitTests/CMakeLists.txt +++ b/src/UnitTests/CMakeLists.txt @@ -54,3 +54,23 @@ if( ZLIB_FOUND ) add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} ) endforeach() endif() + + +if( ${BUILD_MPI} ) + find_package(Hypre) + + if( ${HYPRE_FOUND} ) + foreach( target IN ITEMS HypreTest ) + add_executable( ${target} ${target}.cpp ) + target_compile_definitions( ${target} PUBLIC "-DHAVE_HYPRE" ) + target_compile_options( ${target} PRIVATE ${CXX_TESTS_FLAGS} ) + target_include_directories( ${target} PUBLIC ${HYPRE_INCLUDE_DIRS} ) + target_link_libraries( ${target} ${TESTS_LIBRARIES} ${HYPRE_LIBRARIES} ) + target_link_options( ${target} PRIVATE ${TESTS_LINKER_FLAGS} ) + + set( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX}" ) + add_test( NAME ${target} COMMAND "mpirun" ${mpi_test_parameters}) + add_test( ${target}_nodistr ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} ) + endforeach() + endif() +endif() diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index b6fa763a9c885a43074cbd15d73cdff76919574d..19186feb3b7f4bad60153f6943b02c09d5880a10 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -35,6 +35,12 @@ struct MyData // operator used in tests, not necessary for Array to work template< typename T > bool operator==( T v ) const { return data == v; } + + // operator used in ArrayIO::loadSubrange (due to casting requested from the tests) + operator double() const + { + return data; + } }; std::ostream& operator<<( std::ostream& str, const MyData& v ) @@ -646,6 +652,48 @@ TYPED_TEST( ArrayTest, SaveAndLoad ) EXPECT_EQ( std::remove( TEST_FILE_NAME ), 0 ); } +TYPED_TEST( ArrayTest, SaveAndLoadSubrangeWithCast ) +{ + using ArrayType = typename TestFixture::ArrayType; + using Value = typename ArrayType::ValueType; + using Index = typename ArrayType::IndexType; + using namespace TNL::Containers::detail; + + ArrayType v; + v.setSize( 100 ); + for( int i = 0; i < 100; i ++ ) + v.setElement( i, i ); + ASSERT_NO_THROW( File( TEST_FILE_NAME, std::ios_base::out ) << v ); + + const int offset = 25; + const int subrangeSize = 50; + using CastValue = short int; + Array< CastValue, typename ArrayType::DeviceType, long > array; + array.setSize( subrangeSize ); + File file( TEST_FILE_NAME, std::ios_base::in ); + { + // read type + const std::string type = getObjectType( file ); + ASSERT_EQ( type, ArrayType::getSerializationType() ); + // read size + std::size_t elementsInFile; + file.load( &elementsInFile ); + EXPECT_EQ( elementsInFile, static_cast< std::size_t >( v.getSize() ) ); + // read data, cast from Value to short int + using IO = ArrayIO< CastValue, Index, typename Allocators::Default< typename ArrayType::DeviceType >::template Allocator< CastValue > >; + // hack for the test... + if( getType< Value >() == "MyData" ) + IO::loadSubrange( file, elementsInFile, offset, array.getData(), array.getSize(), "double" ); + else + IO::loadSubrange( file, elementsInFile, offset, array.getData(), array.getSize(), getType< Value >() ); + } + for( Index i = 0; i < subrangeSize; i++ ) { + EXPECT_EQ( array.getElement( i ), offset + i ); + } + + EXPECT_EQ( std::remove( TEST_FILE_NAME ), 0 ); +} + TYPED_TEST( ArrayTest, LoadViaView ) { using ArrayType = typename TestFixture::ArrayType; diff --git a/src/UnitTests/FileTest.h b/src/UnitTests/FileTest.h index 7af5fb286ee1f831b2db5c98d0acd119c5ec60ab..dee033c5e1549b42922407ef181f13781dc9e735 100644 --- a/src/UnitTests/FileTest.h +++ b/src/UnitTests/FileTest.h @@ -18,11 +18,13 @@ TEST( FileTest, WriteAndRead ) File file; ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out ) ); - int intData( 5 ); + int intData = 5; double doubleData[ 3 ] = { 1.0, 2.0, 3.0 }; + const int ignoredValue = 42; const double constDoubleData = 3.14; ASSERT_NO_THROW( file.save( &intData ) ); ASSERT_NO_THROW( file.save( doubleData, 3 ) ); + ASSERT_NO_THROW( file.save( &ignoredValue ) ); ASSERT_NO_THROW( file.save( &constDoubleData ) ); ASSERT_NO_THROW( file.close() ); @@ -32,6 +34,7 @@ TEST( FileTest, WriteAndRead ) double newConstDoubleData; ASSERT_NO_THROW( file.load( &newIntData, 1 ) ); ASSERT_NO_THROW( file.load( newDoubleData, 3 ) ); + ASSERT_NO_THROW( file.ignore< decltype(ignoredValue) >( 1 ) ); ASSERT_NO_THROW( file.load( &newConstDoubleData, 1 ) ); EXPECT_EQ( newIntData, intData ); diff --git a/src/UnitTests/HypreTest.cpp b/src/UnitTests/HypreTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..45d1c5d2d8d5e884d62020a04f844bf92d2d572a --- /dev/null +++ b/src/UnitTests/HypreTest.cpp @@ -0,0 +1,490 @@ +#ifdef HAVE_GTEST + +#include <gtest/gtest.h> +#include "Containers/VectorHelperFunctions.h" + +#ifdef HAVE_HYPRE + +#include <TNL/Containers/HypreVector.h> +#include <TNL/Containers/HypreParVector.h> +#include <TNL/Matrices/HypreCSRMatrix.h> +#include <TNL/Matrices/HypreParCSRMatrix.h> +#include <TNL/Solvers/Linear/Hypre.h> + +#include <TNL/Containers/Partitioner.h> +#include <TNL/Matrices/SparseMatrix.h> + +using namespace TNL; +using namespace TNL::Solvers::Linear; + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEST( HypreTest, Vector ) +{ + TNL::Hypre hypre; + + constexpr int size = 10; + + using Vector = Containers::Vector< double, HYPRE_Device >; + Vector v( size, 2 ); + + Containers::HypreVector a; + a.setSize( size ); + EXPECT_TRUE( a.getData() ); + EXPECT_EQ( a.getSize(), size ); + a.setValue( 2 ); + EXPECT_EQ( a.getConstView(), v ); + EXPECT_EQ( a.getView(), v ); + + Containers::HypreVector b; + b.bind( a ); + EXPECT_EQ( b.getData(), a.getData() ); + EXPECT_EQ( b.getSize(), a.getSize() ); + + a.bind( v ); + EXPECT_EQ( a.getData(), v.getData() ); + EXPECT_EQ( a.getSize(), v.getSize() ); + + // test move-constructor and move-assignment + Containers::HypreVector c = std::move( b ); + b = std::move( c ); +} + +TEST( HypreTest, AssumedPartitionCheck ) +{ + // this should always return 1: + // https://github.com/hypre-space/hypre/blob/master/src/utilities/ap.c + ASSERT_EQ( HYPRE_AssumedPartitionCheck(), 1 ); +} + +template< typename DistributedArray > +auto getDistributedArray( MPI_Comm communicator, + typename DistributedArray::IndexType globalSize, + typename DistributedArray::IndexType ghosts ) +{ + DistributedArray array; + + using LocalRangeType = typename DistributedArray::LocalRangeType; + const LocalRangeType localRange = Containers::Partitioner< typename DistributedArray::IndexType >::splitRange( globalSize, communicator ); + array.setDistribution( localRange, ghosts, globalSize, communicator ); + + using Synchronizer = typename Containers::Partitioner< typename DistributedArray::IndexType >::template ArraySynchronizer< typename DistributedArray::DeviceType >; + array.setSynchronizer( std::make_shared<Synchronizer>( localRange, ghosts / 2, communicator ) ); + + return array; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEST( HypreTest, ParVector ) +{ + TNL::Hypre hypre; + + const MPI_Comm communicator = MPI_COMM_WORLD; + const int globalSize = 97; // prime number to force non-uniform distribution + // some arbitrary even value (but must be 0 if not distributed) + const int ghosts = (TNL::MPI::GetSize(communicator) > 1) ? 4 : 0; + + using DistributedVector = Containers::DistributedVector< HYPRE_Real, HYPRE_Device, HYPRE_BigInt >; + auto v = getDistributedArray< DistributedVector >( communicator, globalSize, ghosts ); + v.setValue( 2 ); + + auto a = getDistributedArray< Containers::HypreParVector >( communicator, globalSize, ghosts ); + EXPECT_EQ( a.getSize(), v.getSize() ); + EXPECT_EQ( a.getLocalRange(), v.getLocalRange() ); + a.setValue( 2 ); + EXPECT_EQ( a.getConstView(), v ); + EXPECT_EQ( a.getView(), v ); + + Containers::HypreParVector b; + b.bind( v ); + EXPECT_EQ( b.getSize(), v.getSize() ); + EXPECT_EQ( b.getLocalView().getData(), v.getLocalView().getData() ); + EXPECT_EQ( b.getLocalView(), v.getLocalView() ); + + // test move-constructor and move-assignment + Containers::HypreParVector c = std::move( b ); + b = std::move( c ); +} + +/*** + * Creates the following matrix (dots represent zero matrix elements): + * + * / 2.5 -1 . . . \ + * | -1 2.5 -1 . . | + * | . -1 2.5 -1. . | + * | . . -1 2.5 -1 | + * \ . . . -1 2.5 / + */ +template< typename MatrixType > +MatrixType getGlobalMatrix( int size ) +{ + using Vector = Containers::Vector< typename MatrixType::RealType, typename MatrixType::DeviceType, typename MatrixType::IndexType >; + MatrixType matrix; + matrix.setDimensions( size, size ); + Vector capacities( size, 3 ); + capacities.setElement( 0, 2 ); + capacities.setElement( size - 1, 2 ); + matrix.setRowCapacities( capacities ); + + auto f = [=] __cuda_callable__ ( typename MatrixType::ViewType::RowView& row ) mutable { + const int rowIdx = row.getRowIndex(); + if( rowIdx == 0 ) + { + row.setElement( 0, rowIdx, 2.5 ); // diagonal element + row.setElement( 1, rowIdx+1, -1 ); // element above the diagonal + } + else if( rowIdx == size - 1 ) + { + row.setElement( 0, rowIdx-1, -1.0 ); // element below the diagonal + row.setElement( 1, rowIdx, 2.5 ); // diagonal element + } + else + { + row.setElement( 0, rowIdx-1, -1.0 ); // element below the diagonal + row.setElement( 1, rowIdx, 2.5 ); // diagonal element + row.setElement( 2, rowIdx+1, -1.0 ); // element above the diagonal + } + }; + matrix.getView().forAllRows( f ); + + return matrix; +} + +// returns only a local block of the global matrix created by getGlobalMatrix +template< typename MatrixType > +MatrixType getLocalBlock( typename MatrixType::IndexType global_size, + Containers::Subrange< typename MatrixType::IndexType > local_row_range ) +{ + using Vector = Containers::Vector< typename MatrixType::RealType, typename MatrixType::DeviceType, typename MatrixType::IndexType >; + MatrixType matrix; + matrix.setDimensions( local_row_range.getSize(), global_size ); + Vector capacities( local_row_range.getSize(), 3 ); + if( local_row_range.getBegin() == 0 ) + capacities.setElement( 0, 2 ); + if( local_row_range.getEnd() == global_size ) + capacities.setElement( local_row_range.getSize() - 1, 2 ); + matrix.setRowCapacities( capacities ); + + auto f = [=] __cuda_callable__ ( typename MatrixType::ViewType::RowView& row ) mutable { + const int rowIdx = row.getRowIndex(); + const int colIdx = local_row_range.getBegin() + rowIdx; + if( colIdx == 0 ) + { + row.setElement( 0, colIdx, 2.5 ); // diagonal element + row.setElement( 1, colIdx+1, -1 ); // element above the diagonal + } + else if( colIdx == global_size - 1 ) + { + row.setElement( 0, colIdx-1, -1.0 ); // element below the diagonal + row.setElement( 1, colIdx, 2.5 ); // diagonal element + } + else + { + row.setElement( 0, colIdx-1, -1.0 ); // element below the diagonal + row.setElement( 1, colIdx, 2.5 ); // diagonal element + row.setElement( 2, colIdx+1, -1.0 ); // element above the diagonal + } + }; + matrix.getView().forAllRows( f ); + + return matrix; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEST( HypreTest, CSRMatrix ) +{ + TNL::Hypre hypre; + + constexpr int size = 97; + + using MatrixType = Matrices::SparseMatrix< double, HYPRE_Device >; + MatrixType matrix = getGlobalMatrix< MatrixType >( size ); + + Matrices::HypreCSRMatrix a = getGlobalMatrix< Matrices::HypreCSRMatrix >( size ); + EXPECT_GT( a.getValues().getSize(), 0 ); + EXPECT_GT( a.getColumnIndexes().getSize(), 0 ); + EXPECT_GT( a.getRowOffsets().getSize(), 0 ); + EXPECT_EQ( a.getRows(), size ); + EXPECT_EQ( a.getColumns(), size ); + EXPECT_EQ( a.getConstView(), matrix ); + EXPECT_EQ( a.getView(), matrix ); + + Matrices::HypreCSRMatrix b; + b.bind( matrix ); + EXPECT_EQ( b.getRows(), size ); + EXPECT_EQ( b.getColumns(), size ); + EXPECT_EQ( b.getConstView(), matrix ); + EXPECT_EQ( b.getView(), matrix ); + + // test move-constructor and move-assignment + Matrices::HypreCSRMatrix c = std::move( b ); + b = std::move( c ); +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEST( HypreTest, ParCSRMatrix_wrapCSRMatrix ) +{ + TNL::Hypre hypre; + + constexpr int size = 97; + Matrices::HypreCSRMatrix matrix = getGlobalMatrix< Matrices::HypreCSRMatrix >( size ); + + auto a = Matrices::HypreParCSRMatrix::wrapCSRMatrix( matrix ); + EXPECT_EQ( a.getRows(), size ); + EXPECT_EQ( a.getColumns(), size ); + EXPECT_EQ( a.getLocalRowRange(), Containers::Subrange< HYPRE_BigInt >( 0, size ) ); + EXPECT_EQ( a.getLocalColumnRange(), Containers::Subrange< HYPRE_BigInt >( 0, size ) ); + EXPECT_EQ( a.getNonzeroElementsCount(), a.getDiagonalBlock().getNonzeroElementsCount() ); + + Matrices::HypreCSRMatrix a_diag = a.getDiagonalBlock(); + // the matrices are not elementwise-equal, ParCSR is reordered such that the + // diagonal elements are first in the rows +// EXPECT_EQ( a_diag.getValues(), matrix.getValues() ); +// EXPECT_EQ( a_diag.getColumnIndexes(), matrix.getColumnIndexes() ); +// EXPECT_EQ( a_diag.getRowOffsets(), matrix.getRowOffsets() ); + EXPECT_EQ( a_diag.getValues().getSize(), matrix.getValues().getSize() ); + EXPECT_EQ( a_diag.getColumnIndexes().getSize(), matrix.getColumnIndexes().getSize() ); + EXPECT_EQ( a_diag.getRowOffsets().getSize(), matrix.getRowOffsets().getSize() ); + EXPECT_EQ( a_diag.getRows(), size ); + EXPECT_EQ( a_diag.getColumns(), size ); +// EXPECT_EQ( a_diag.getConstView(), matrix ); +// EXPECT_EQ( a_diag.getView(), matrix ); + + Matrices::HypreCSRMatrix a_offd = a.getOffdiagonalBlock(); + EXPECT_EQ( a_offd.getValues().getSize(), 0 ); + EXPECT_EQ( a_offd.getColumnIndexes().getSize(), 0 ); + EXPECT_EQ( a_offd.getRowOffsets().getSize(), 0 ); + EXPECT_EQ( a_offd.getRows(), size ); + EXPECT_EQ( a_offd.getColumns(), 0 ); + + // test move-constructor and move-assignment + Matrices::HypreParCSRMatrix c = std::move( a ); + a = std::move( c ); +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEST( HypreTest, ParCSRMatrix_fromMasterRank ) +{ + TNL::Hypre hypre; + + const int rank = TNL::MPI::GetRank( MPI_COMM_WORLD ); + const int nproc = TNL::MPI::GetSize( MPI_COMM_WORLD ); + + constexpr int global_size = 97; + Matrices::HypreCSRMatrix matrix = getGlobalMatrix< Matrices::HypreCSRMatrix >( global_size ); + + using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >; + auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 ); + Containers::HypreParVector hypre_x; + hypre_x.bind( x ); + + auto a = Matrices::HypreParCSRMatrix::fromMasterRank( matrix, hypre_x, hypre_x ); + EXPECT_EQ( a.getRows(), global_size ); + EXPECT_EQ( a.getColumns(), global_size ); + EXPECT_EQ( a.getLocalRowRange(), x.getLocalRange() ); + EXPECT_EQ( a.getLocalColumnRange(), x.getLocalRange() ); + + Matrices::HypreCSRMatrix a_diag = a.getDiagonalBlock(); + const int local_size = x.getLocalRange().getSize(); + EXPECT_EQ( a_diag.getRowOffsets().getSize(), local_size + 1 ); + EXPECT_EQ( a_diag.getRows(), local_size ); + EXPECT_EQ( a_diag.getColumns(), local_size ); + + Matrices::HypreCSRMatrix a_offd = a.getOffdiagonalBlock(); + EXPECT_EQ( a_offd.getRowOffsets().getSize(), local_size + 1 ); + EXPECT_EQ( a_offd.getRows(), local_size ); + const int offd_cols = ( nproc == 1 ) ? 0 : ( rank > 0 && rank < nproc - 1 ) ? 2 : 1; + EXPECT_EQ( a_offd.getColumns(), offd_cols ); + + auto col_map_offd = a.getOffdiagonalColumnsMapping(); + EXPECT_EQ( col_map_offd.getSize(), offd_cols ); + + // test move-constructor and move-assignment + Matrices::HypreParCSRMatrix c = std::move( a ); + a = std::move( c ); +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEST( HypreTest, ParCSRMatrix_fromLocalBlocks ) +{ + TNL::Hypre hypre; + + const int rank = TNL::MPI::GetRank( MPI_COMM_WORLD ); + const int nproc = TNL::MPI::GetSize( MPI_COMM_WORLD ); + + constexpr int global_size = 97; + + using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >; + auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 ); + + Matrices::HypreCSRMatrix matrix = getLocalBlock< Matrices::HypreCSRMatrix >( global_size, x.getLocalRange() ); + + auto a = Matrices::HypreParCSRMatrix::fromLocalBlocks( MPI_COMM_WORLD, global_size, global_size, x.getLocalRange(), x.getLocalRange(), matrix ); + EXPECT_EQ( a.getRows(), global_size ); + EXPECT_EQ( a.getColumns(), global_size ); + EXPECT_EQ( a.getLocalRowRange(), x.getLocalRange() ); + EXPECT_EQ( a.getLocalColumnRange(), x.getLocalRange() ); + + Matrices::HypreCSRMatrix a_diag = a.getDiagonalBlock(); + const int local_size = x.getLocalRange().getSize(); + EXPECT_EQ( a_diag.getRowOffsets().getSize(), local_size + 1 ); + EXPECT_EQ( a_diag.getRows(), local_size ); + EXPECT_EQ( a_diag.getColumns(), local_size ); + + Matrices::HypreCSRMatrix a_offd = a.getOffdiagonalBlock(); + EXPECT_EQ( a_offd.getRowOffsets().getSize(), local_size + 1 ); + EXPECT_EQ( a_offd.getRows(), local_size ); + const int offd_cols = ( nproc == 1 ) ? 0 : ( rank > 0 && rank < nproc - 1 ) ? 2 : 1; + EXPECT_EQ( a_offd.getColumns(), offd_cols ); + + auto col_map_offd = a.getOffdiagonalColumnsMapping(); + EXPECT_EQ( col_map_offd.getSize(), offd_cols ); + + Matrices::HypreCSRMatrix a_local = a.getMergedLocalMatrix(); + EXPECT_EQ( a_local.getRows(), matrix.getRows() ); + EXPECT_EQ( a_local.getColumns(), matrix.getColumns() ); + EXPECT_EQ( a_local.getNonzeroElementsCount(), matrix.getNonzeroElementsCount() ); + // TODO: the merged local matrix still has the diagonal element as the first entry per row + // and we can't use hypre_CSRMatrixReorder on the original block, because it is not square +// hypre_CSRMatrixReorder( matrix ); +// EXPECT_EQ( a_local.getView(), matrix.getView() ); + + // test move-constructor and move-assignment + Matrices::HypreParCSRMatrix c = std::move( a ); + a = std::move( c ); +} + + +void solve( Matrices::HypreParCSRMatrix& A, + Containers::HypreParVector& x, + Containers::HypreParVector& b ) +{ + // create the preconditioner + HypreDiagScale precond; +// HypreParaSails precond( A.getCommunicator() ); +// HypreEuclid precond( A.getCommunicator() ); +// HypreILU precond; +// HypreBoomerAMG precond; + + // initialize the Hypre solver + HyprePCG solver( A.getCommunicator() ); +// solver.setPrintLevel( 1 ); + solver.setPreconditioner( precond ); + solver.setMatrix( A ); + solver.setTol( 1e-9 ); + solver.setResidualConvergenceOptions( -1, 1e-9 ); + + // solve the linear system + solver.solve( b, x ); +} + +TEST( HypreTest, solve_seq ) +{ + TNL::Hypre hypre; + + constexpr int size = 97; + + // create the global matrix + using MatrixType = Matrices::HypreCSRMatrix; + auto global_matrix = getGlobalMatrix< MatrixType >( size ); + + // set the dofs and right-hand-side vectors + using Vector = Containers::Vector< double, HYPRE_Device >; + Vector x( size, 1.0 ); + Vector b( size ); + global_matrix.getView().vectorProduct( x, b ); + x = 0.0; + + // bind parallel Hypre vectors + Containers::HypreParVector hypre_x; + Containers::HypreParVector hypre_b; + hypre_x.bind( {0, size}, 0, size, MPI_COMM_SELF, x.getView() ); + hypre_b.bind( {0, size}, 0, size, MPI_COMM_SELF, b.getView() ); + + // convert the matrix to HypreParCSR + HYPRE_BigInt row_starts[ 2 ]; + row_starts[ 0 ] = 0; + row_starts[ 1 ] = size; + auto matrix = Matrices::HypreParCSRMatrix::fromMasterRank( MPI_COMM_SELF, row_starts, row_starts, global_matrix ); + + // solve the linear system + solve( matrix, hypre_x, hypre_b ); + + // verify the solution + expect_near( x, 1, 1e-8 ); +} + +TEST( HypreTest, solve_distributed_fromMasterRank ) +{ + TNL::Hypre hypre; + + constexpr int global_size = 97; + + // create the dofs and right-hand-side vectors + using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >; + auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 ); + auto b = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 ); + + // bind parallel Hypre vectors + Containers::HypreParVector hypre_x; + Containers::HypreParVector hypre_b; + hypre_x.bind( x ); + hypre_b.bind( b ); + + // create the global matrix + using MatrixType = Matrices::HypreCSRMatrix; + auto global_matrix = getGlobalMatrix< MatrixType >( global_size ); + + // distribute the matrix to Hypre format (rank 0 -> all ranks) + auto matrix = Matrices::HypreParCSRMatrix::fromMasterRank( global_matrix, hypre_x, hypre_b ); + + // set the right-hand-side + x.setValue( 1 ); + HYPRE_ParCSRMatrixMatvec( 1.0, matrix, hypre_x, 0.0, hypre_b ); + x.setValue( 0 ); + + // solve the linear system + solve( matrix, hypre_x, hypre_b ); + + // verify the solution + expect_near( x.getLocalView(), 1, 1e-8 ); +} + +TEST( HypreTest, solve_distributed_fromLocalBlock ) +{ + TNL::Hypre hypre; + + constexpr int global_size = 97; + + // create the dofs and right-hand-side vectors + using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >; + auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 ); + auto b = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 ); + + // bind parallel Hypre vectors + Containers::HypreParVector hypre_x; + Containers::HypreParVector hypre_b; + hypre_x.bind( x ); + hypre_b.bind( b ); + + // create the local matrix block + Matrices::HypreCSRMatrix local_matrix = getLocalBlock< Matrices::HypreCSRMatrix >( global_size, x.getLocalRange() ); + + // create the distributed matrix + auto matrix = Matrices::HypreParCSRMatrix::fromLocalBlocks( MPI_COMM_WORLD, global_size, global_size, x.getLocalRange(), x.getLocalRange(), local_matrix ); + + // set the right-hand-side + x.setValue( 1 ); + HYPRE_ParCSRMatrixMatvec( 1.0, matrix, hypre_x, 0.0, hypre_b ); + x.setValue( 0 ); + + // solve the linear system + solve( matrix, hypre_x, hypre_b ); + + // verify the solution + expect_near( x.getLocalView(), 1, 1e-8 ); +} + +#endif +#endif + +#include "main_mpi.h"