diff --git a/src/Examples/CMakeLists.txt b/src/Examples/CMakeLists.txt index 1e8a400bcbbc68dd3aec2b06bd4592372c05e13c..11ce01f0ed78ef144af39241607677d91c6063dc 100644 --- a/src/Examples/CMakeLists.txt +++ b/src/Examples/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory( Hypre ) + #add_subdirectory( simple-examples ) #add_subdirectory( Hamilton-Jacobi ) #add_subdirectory( heat-equation ) diff --git a/src/Examples/Hypre/CMakeLists.txt b/src/Examples/Hypre/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c9eb9184bf5cb46574592bea7d8e3357f64f8b5 --- /dev/null +++ b/src/Examples/Hypre/CMakeLists.txt @@ -0,0 +1,13 @@ +if( ${BUILD_MPI} ) + find_package(Hypre) + if( ${HYPRE_FOUND} ) + foreach( source IN ITEMS hypre-ex5.c tnl-hypre-ex5.cpp ) + string( REGEX REPLACE "\.cpp|\.c" "" target ${source} ) + add_executable( ${target} ${source} ) + target_compile_definitions( ${target} PUBLIC "-DHAVE_HYPRE" ) + target_include_directories( ${target} PUBLIC ${HYPRE_INCLUDE_DIRS} ) + target_link_libraries( ${target} ${HYPRE_LIBRARIES} -lm ) + install( TARGETS ${target} RUNTIME DESTINATION bin ) + endforeach() + endif() +endif() diff --git a/src/Examples/Hypre/ex.h b/src/Examples/Hypre/ex.h new file mode 100644 index 0000000000000000000000000000000000000000..8b2324c1700b1b8a23d9e34cba84c1808dbf13e1 --- /dev/null +++ b/src/Examples/Hypre/ex.h @@ -0,0 +1,47 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/*-------------------------------------------------------------------------- + * Header file for examples + *--------------------------------------------------------------------------*/ + +#ifndef HYPRE_EXAMPLES_INCLUDES +#define HYPRE_EXAMPLES_INCLUDES + +#include <HYPRE_config.h> + +#if defined(HYPRE_EXAMPLE_USING_CUDA) + +#include <cuda_runtime.h> + +#ifndef HYPRE_USING_UNIFIED_MEMORY +#error *** Running the examples on GPUs requires Unified Memory. Please reconfigure and rebuild with --enable-unified-memory *** +#endif + +static inline void* +gpu_malloc(size_t size) +{ + void *ptr = NULL; + cudaMallocManaged(&ptr, size, cudaMemAttachGlobal); + return ptr; +} + +static inline void* +gpu_calloc(size_t num, size_t size) +{ + void *ptr = NULL; + cudaMallocManaged(&ptr, num * size, cudaMemAttachGlobal); + cudaMemset(ptr, 0, num * size); + return ptr; +} + +#define malloc(size) gpu_malloc(size) +#define calloc(num, size) gpu_calloc(num, size) +#define free(ptr) ( cudaFree(ptr), ptr = NULL ) +#endif /* #if defined(HYPRE_EXAMPLE_USING_CUDA) */ +#endif /* #ifndef HYPRE_EXAMPLES_INCLUDES */ + diff --git a/src/Examples/Hypre/glvis-ex5.sh b/src/Examples/Hypre/glvis-ex5.sh new file mode 100755 index 0000000000000000000000000000000000000000..403dcf447772edf7ed6ddb484c7abafa3307dcfa --- /dev/null +++ b/src/Examples/Hypre/glvis-ex5.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +ex=ex5 +keys=Aaamc + +dir="${1:-.}" +mesh="$dir/$ex.mesh" +sol="$dir/$ex.sol" + +if ! test -e "$mesh"; then + echo "Error: cannot find mesh file for $ex" + exit 1 +fi + +echo "FiniteElementSpace" > "$sol" +echo "FiniteElementCollection: H1_2D_P1" >> "$sol" +echo "VDim: 1" >> "$sol" +echo "Ordering: 0" >> "$sol" +echo "" >> "$sol" +find "$dir" -name "$ex.sol.??????" | sort | xargs cat >> "$sol" + +glvis -m "$mesh" -g "$sol" -k "$keys" diff --git a/src/Examples/Hypre/hypre-ex5.c b/src/Examples/Hypre/hypre-ex5.c new file mode 100644 index 0000000000000000000000000000000000000000..79c3128afa363e8474fa8c7d9b99df6c931f110e --- /dev/null +++ b/src/Examples/Hypre/hypre-ex5.c @@ -0,0 +1,674 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* + Example 5 + + Interface: Linear-Algebraic (IJ) + + Compile with: use the install script from TNL + + Sample run: OMP_NUM_THREADS=1 mpirun -np 4 hypre-ex5 + + Description: + + This example solves the 2-D Laplacian problem with zero boundary conditions + on an n x n grid. The number of unknowns is N=n^2. The standard 5-point + stencil is used, and we solve for the interior nodes only. + + The example is based on Hypre's Example 5 and its modification "ex5big" that + illustrates the 64-bit integer support in Hypre needed to run problems with + more than 2B unknowns. We recommend comparing this example with both + examples in Hypre: + + - https://github.com/hypre-space/hypre/blob/master/src/examples/ex5.c + - https://github.com/hypre-space/hypre/blob/master/src/examples/ex5big.c + + To enable the 64-bit integer support, you need to build Hypre with the + --enable-bigint option of the configure script. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include <HYPRE_krylov.h> +#include <HYPRE.h> +#include <HYPRE_parcsr_ls.h> + +#include "ex.h" +#include "vis.c" + +int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, int iterations, + double rel_residual_norm); + +#define my_min(a,b) (((a)<(b)) ? (a) : (b)) + +int main (int argc, char *argv[]) +{ + HYPRE_Int i; + int myid, num_procs; + int N, n; + + HYPRE_Int ilower, iupper; + HYPRE_Int local_size, extra; + + int solver_id; + int vis, print_system; + + double h, h2; + + HYPRE_IJMatrix A; + HYPRE_ParCSRMatrix parcsr_A; + HYPRE_IJVector b; + HYPRE_ParVector par_b; + HYPRE_IJVector x; + HYPRE_ParVector par_x; + + HYPRE_Solver solver, precond; + + /* Initialize MPI */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + + /* Initialize HYPRE */ + HYPRE_Init(); + + /* Print GPU info */ + /* HYPRE_PrintDeviceInfo(); */ +#if defined(HYPRE_USING_GPU) + /* use cuSPARSE for SpGEMM */ + HYPRE_SetSpGemmUseCusparse(0); +#endif + + /* Default problem parameters */ + n = 33; + solver_id = 0; + vis = 0; + print_system = 0; + + + /* Parse command line */ + { + int arg_index = 0; + int print_usage = 0; + + while (arg_index < argc) + { + if ( strcmp(argv[arg_index], "-n") == 0 ) + { + arg_index++; + n = atoi(argv[arg_index++]); + } + else if ( strcmp(argv[arg_index], "-solver") == 0 ) + { + arg_index++; + solver_id = atoi(argv[arg_index++]); + } + else if ( strcmp(argv[arg_index], "-vis") == 0 ) + { + arg_index++; + vis = 1; + } + else if ( strcmp(argv[arg_index], "-print_system") == 0 ) + { + arg_index++; + print_system = 1; + } + else if ( strcmp(argv[arg_index], "-help") == 0 ) + { + print_usage = 1; + break; + } + else + { + arg_index++; + } + } + + if ((print_usage) && (myid == 0)) + { + printf("\n"); + printf("Usage: %s [<options>]\n", argv[0]); + printf("\n"); + printf(" -n <n> : problem size in each direction (default: 33)\n"); + printf(" -solver <ID> : solver ID\n"); + printf(" 0 - AMG (default) \n"); + printf(" 1 - AMG-PCG\n"); + printf(" 8 - ParaSails-PCG\n"); + printf(" 50 - PCG\n"); + printf(" 61 - AMG-FlexGMRES\n"); + printf(" -vis : save the solution for GLVis visualization\n"); + printf(" -print_system : print the matrix and rhs\n"); + printf("\n"); + } + + if (print_usage) + { + HYPRE_Finalize(); + MPI_Finalize(); + return (0); + } + } + + /* Preliminaries: want at least one processor per row */ + if (n * n < num_procs) { n = sqrt(num_procs) + 1; } + N = n * n; /* global number of rows */ + h = 1.0 / (n + 1); /* mesh size*/ + h2 = h * h; + + /* Each processor knows only of its own rows - the range is denoted by ilower + and upper. Here we partition the rows. We account for the fact that + N may not divide evenly by the number of processors. */ + local_size = N / num_procs; + extra = N - local_size * num_procs; + + ilower = local_size * myid; + ilower += my_min(myid, extra); + + iupper = local_size * (myid + 1); + iupper += my_min(myid + 1, extra); + iupper = iupper - 1; + + /* How many rows do I have? */ + local_size = iupper - ilower + 1; + + /* Create the matrix. + Note that this is a square matrix, so we indicate the row partition + size twice (since number of rows = number of cols) */ + HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); + + /* Choose a parallel csr format storage (see the User's Manual) */ + HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); + + /* Initialize before setting coefficients */ + HYPRE_IJMatrixInitialize(A); + + /* Now go through my local rows and set the matrix entries. + Each row has at most 5 entries. For example, if n=3: + + A = [M -I 0; -I M -I; 0 -I M] + M = [4 -1 0; -1 4 -1; 0 -1 4] + + Note that here we are setting one row at a time, though + one could set all the rows together (see the User's Manual). + */ + { + int nnz; + /* OK to use constant-length arrays for CPUs + double values[5]; + HYPRE_Int cols[5]; + */ + double *values = (double *) malloc(5 * sizeof(double)); + HYPRE_Int *cols = (HYPRE_Int *) malloc(5 * sizeof(HYPRE_Int)); + HYPRE_Int *tmp = (HYPRE_Int *) malloc(2 * sizeof(HYPRE_Int)); + + for (i = ilower; i <= iupper; i++) + { + nnz = 0; + + /* The left identity block:position i-n */ + if ((i - n) >= 0) + { + cols[nnz] = i - n; + values[nnz] = -1.0; + nnz++; + } + + /* The left -1: position i-1 */ + if (i % n) + { + cols[nnz] = i - 1; + values[nnz] = -1.0; + nnz++; + } + + /* Set the diagonal: position i */ + cols[nnz] = i; + values[nnz] = 4.0; + nnz++; + + /* The right -1: position i+1 */ + if ((i + 1) % n) + { + cols[nnz] = i + 1; + values[nnz] = -1.0; + nnz++; + } + + /* The right identity block:position i+n */ + if ((i + n) < N) + { + cols[nnz] = i + n; + values[nnz] = -1.0; + nnz++; + } + + /* Set the values for row i */ + tmp[0] = nnz; + tmp[1] = i; + HYPRE_IJMatrixSetValues(A, 1, &tmp[0], &tmp[1], cols, values); + } + + free(values); + free(cols); + free(tmp); + } + + /* Assemble after setting the coefficients */ + HYPRE_IJMatrixAssemble(A); + + /* Note: for the testing of small problems, one may wish to read + in a matrix in IJ format (for the format, see the output files + from the -print_system option). + In this case, one would use the following routine: + HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD, + HYPRE_PARCSR, &A ); + <filename> = IJ.A.out to read in what has been printed out + by -print_system (processor numbers are omitted). + A call to HYPRE_IJMatrixRead is an *alternative* to the + following sequence of HYPRE_IJMatrix calls: + Create, SetObjectType, Initialize, SetValues, and Assemble + */ + + + /* Get the parcsr matrix object to use */ + HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); + + + /* Create the rhs and solution */ + HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper, &b); + HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); + HYPRE_IJVectorInitialize(b); + + HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper, &x); + HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); + HYPRE_IJVectorInitialize(x); + + /* Set the rhs values to h^2 and the solution to zero */ + { + double *rhs_values, *x_values; + HYPRE_Int *rows; + + rhs_values = (double*) calloc(local_size, sizeof(double)); + x_values = (double*) calloc(local_size, sizeof(double)); + rows = (HYPRE_Int*) calloc(local_size, sizeof(HYPRE_Int)); + + for (i = 0; i < local_size; i++) + { + rhs_values[i] = h2; + x_values[i] = 0.0; + rows[i] = ilower + i; + } + + HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); + HYPRE_IJVectorSetValues(x, local_size, rows, x_values); + + free(x_values); + free(rhs_values); + free(rows); + } + + + HYPRE_IJVectorAssemble(b); + /* As with the matrix, for testing purposes, one may wish to read in a rhs: + HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD, + HYPRE_PARCSR, &b ); + as an alternative to the + following sequence of HYPRE_IJVectors calls: + Create, SetObjectType, Initialize, SetValues, and Assemble + */ + HYPRE_IJVectorGetObject(b, (void **) &par_b); + + HYPRE_IJVectorAssemble(x); + HYPRE_IJVectorGetObject(x, (void **) &par_x); + + + /* Print out the system - files names will be IJ.out.A.XXXXX + and IJ.out.b.XXXXX, where XXXXX = processor id */ + if (print_system) + { + HYPRE_IJMatrixPrint(A, "IJ.out.A"); + HYPRE_IJVectorPrint(b, "IJ.out.b"); + } + + + /* Choose a solver and solve the system */ + + /* AMG */ + if (solver_id == 0) + { + HYPRE_Int num_iterations; + double final_res_norm; + + /* Create solver */ + HYPRE_BoomerAMGCreate(&solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ + HYPRE_BoomerAMGSetOldDefault(solver); /* Falgout coarsening with modified classical interpolation */ + HYPRE_BoomerAMGSetRelaxType(solver, 6); /* Sym. G-S/Jacobi hybrid relaxation */ + HYPRE_BoomerAMGSetRelaxOrder(solver, 1); /* Uses C/F relaxation */ + HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ + HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ + + /* Now setup and solve! */ + HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); + HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destroy solver */ + HYPRE_BoomerAMGDestroy(solver); + } + /* PCG */ + else if (solver_id == 50) + { + HYPRE_Int num_iterations; + double final_res_norm; + + /* Create solver */ + HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ + HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ + HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ + + /* Now setup and solve! */ + HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_PCGGetNumIterations(solver, &num_iterations); + HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destroy solver */ + HYPRE_ParCSRPCGDestroy(solver); + } + /* PCG with AMG preconditioner */ + else if (solver_id == 1) + { + HYPRE_Int num_iterations; + double final_res_norm; + + /* Create solver */ + HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ + HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ + HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ + + /* Now set up the AMG preconditioner and specify any parameters */ + HYPRE_BoomerAMGCreate(&precond); + HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ + HYPRE_BoomerAMGSetCoarsenType(precond, 6); + HYPRE_BoomerAMGSetOldDefault(precond); + HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid relaxation */ + HYPRE_BoomerAMGSetRelaxOrder(precond, 1); /* Uses C/F relaxation */ + HYPRE_BoomerAMGSetNumSweeps(precond, 1); + HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ + HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ + + /* Set the PCG preconditioner */ + HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, + (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); + + /* Now setup and solve! */ + HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_PCGGetNumIterations(solver, &num_iterations); + HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destroy solver and preconditioner */ + HYPRE_ParCSRPCGDestroy(solver); + HYPRE_BoomerAMGDestroy(precond); + } + /* PCG with Parasails Preconditioner */ + else if (solver_id == 8) + { + HYPRE_Int num_iterations; + double final_res_norm; + + int sai_max_levels = 1; + double sai_threshold = 0.1; + double sai_filter = 0.05; + int sai_sym = 1; + + /* Create solver */ + HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ + HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ + HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ + + /* Now set up the ParaSails preconditioner and specify any parameters */ + HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); + HYPRE_ParaSailsSetFilter(precond, sai_filter); + HYPRE_ParaSailsSetSym(precond, sai_sym); + HYPRE_ParaSailsSetLogging(precond, 3); + + /* Set the PCG preconditioner */ + HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, + (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); + + /* Now setup and solve! */ + HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); + + + /* Run info - needed logging turned on */ + HYPRE_PCGGetNumIterations(solver, &num_iterations); + HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destory solver and preconditioner */ + HYPRE_ParCSRPCGDestroy(solver); + HYPRE_ParaSailsDestroy(precond); + } + /* Flexible GMRES with AMG Preconditioner */ + else if (solver_id == 61) + { + HYPRE_Int num_iterations; + double final_res_norm; + int restart = 30; + int modify = 1; + + + /* Create solver */ + HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver); + + /* Set some parameters (See Reference Manual for more parameters) */ + HYPRE_FlexGMRESSetKDim(solver, restart); + HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */ + HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */ + HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */ + HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */ + + + /* Now set up the AMG preconditioner and specify any parameters */ + HYPRE_BoomerAMGCreate(&precond); + HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ + HYPRE_BoomerAMGSetCoarsenType(precond, 6); + HYPRE_BoomerAMGSetOldDefault(precond); + HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid relaxation */ + HYPRE_BoomerAMGSetRelaxOrder(precond, 1); /* Uses C/F relaxation */ + HYPRE_BoomerAMGSetNumSweeps(precond, 1); + HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ + HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ + + /* Set the FlexGMRES preconditioner */ + HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, + (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); + + + if (modify) + { + /* this is an optional call - if you don't call it, hypre_FlexGMRESModifyPCDefault + is used - which does nothing. Otherwise, you can define your own, similar to + the one used here */ + HYPRE_FlexGMRESSetModifyPC( + solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample); + } + + + /* Now setup and solve! */ + HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x); + HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x); + + /* Run info - needed logging turned on */ + HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations); + HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm); + if (myid == 0) + { + printf("\n"); + printf("Iterations = %lld\n", (long long int) num_iterations); + printf("Final Relative Residual Norm = %e\n", final_res_norm); + printf("\n"); + } + + /* Destory solver and preconditioner */ + HYPRE_ParCSRFlexGMRESDestroy(solver); + HYPRE_BoomerAMGDestroy(precond); + + } + else + { + if (myid == 0) { printf("Invalid solver id specified.\n"); } + } + + /* Save the solution for GLVis visualization, see vis/glvis-ex5.sh */ + if (vis) + { + FILE *file; + char filename[255]; + + int nvalues = local_size; + int *rows = (int*) calloc(nvalues, sizeof(int)); + double *values = (double*) calloc(nvalues, sizeof(double)); + + for (i = 0; i < nvalues; i++) + { + rows[i] = ilower + i; + } + + /* get the local solution */ + HYPRE_IJVectorGetValues(x, nvalues, rows, values); + + sprintf(filename, "%s.%06d", "vis/ex5.sol", myid); + if ((file = fopen(filename, "w")) == NULL) + { + printf("Error: can't open output file %s\n", filename); + HYPRE_Finalize(); + MPI_Finalize(); + exit(1); + } + + /* save solution */ + for (i = 0; i < nvalues; i++) + { + fprintf(file, "%.14e\n", values[i]); + } + + fflush(file); + fclose(file); + + free(rows); + free(values); + + /* save global finite element mesh */ + if (myid == 0) + { + GLVis_PrintGlobalSquareMesh("vis/ex5.mesh", n - 1); + } + } + + /* Clean up */ + HYPRE_IJMatrixDestroy(A); + HYPRE_IJVectorDestroy(b); + HYPRE_IJVectorDestroy(x); + + /* Finalize HYPRE */ + HYPRE_Finalize(); + + /* Finalize MPI*/ + MPI_Finalize(); + + return (0); +} + +/*-------------------------------------------------------------------------- + hypre_FlexGMRESModifyPCAMGExample - + + This is an example (not recommended) + of how we can modify things about AMG that + affect the solve phase based on how FlexGMRES is doing...For + another preconditioner it may make sense to modify the tolerance.. + + *--------------------------------------------------------------------------*/ + +int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, int iterations, + double rel_residual_norm) +{ + + + if (rel_residual_norm > .1) + { + HYPRE_BoomerAMGSetNumSweeps((HYPRE_Solver)precond_data, 10); + } + else + { + HYPRE_BoomerAMGSetNumSweeps((HYPRE_Solver)precond_data, 1); + } + + + return 0; +} diff --git a/src/Examples/Hypre/tnl-hypre-ex5.cpp b/src/Examples/Hypre/tnl-hypre-ex5.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4a55476ac1c13be243dca546879634b268daa662 --- /dev/null +++ b/src/Examples/Hypre/tnl-hypre-ex5.cpp @@ -0,0 +1,405 @@ +/** + * Description: + * + * This example solves the 2-D Laplacian problem with zero boundary conditions + * on an n x n grid. The number of unknowns is N=n^2. The standard 5-point + * stencil is used, and we solve for the interior nodes only. + * + * The example is based on the "ex5.c" example. It demonstrates how the C code + * can be ported to C++ and TNL. We recommend comparing this example the + * original one. + * + * Compile with: use the install script from TNL + * + * Sample run: OMP_NUM_THREADS=1 mpirun -np 4 tnl-hypre-ex5 + */ + +#include <cstdio> +#include <string> + +#include <TNL/Math.h> +#include <TNL/MPI.h> +#include <TNL/Hypre.h> +#include <TNL/Containers/HypreParVector.h> +#include <TNL/Matrices/HypreParCSRMatrix.h> +#include <TNL/Solvers/Linear/Hypre.h> + +#include "vis.c" + +int +hypre_FlexGMRESModifyPCAMGExample( void* precond_data, int iterations, double rel_residual_norm ); + +int +main( int argc, char* argv[] ) +{ + // Initialize MPI + TNL::MPI::ScopedInitializer mpi( argc, argv ); + const int myid = TNL::MPI::GetRank( MPI_COMM_WORLD ); + const int num_procs = TNL::MPI::GetSize( MPI_COMM_WORLD ); + + // Initialize HYPRE and set some global options, notably HYPRE_SetSpGemmUseCusparse(0); + TNL::Hypre hypre; + + // Default problem parameters + int n = 33; + int solver_id = 0; + int vis = 0; + + // Parse the command line + { + int arg_index = 0; + int print_usage = 0; + + while( arg_index < argc ) { + if( std::string( argv[ arg_index ] ) == "-n" ) { + arg_index++; + n = std::stoi( argv[ arg_index++ ] ); + } + else if( std::string( argv[ arg_index ] ) == "-solver" ) { + arg_index++; + solver_id = std::stoi( argv[ arg_index++ ] ); + } + else if( std::string( argv[ arg_index ] ) == "-vis" ) { + arg_index++; + vis = 1; + } + else if( std::string( argv[ arg_index ] ) == "-help" ) { + print_usage = 1; + break; + } + else { + arg_index++; + } + } + + if( print_usage && myid == 0 ) { + std::cerr << "\n" + "Usage: " << argv[ 0 ] << " [<options>]\n" + "\n" + " -n <n> : problem size in each direction (default: 33)\n" + " -solver <ID> : solver ID\n" + " 0 - AMG (default) \n" + " 1 - AMG-PCG\n" + " 8 - ParaSails-PCG\n" + " 50 - PCG\n" + " 61 - AMG-FlexGMRES\n" + " -vis : save the solution for GLVis visualization\n" + << std::endl; + } + + if( print_usage ) + return EXIT_SUCCESS; + } + + // Set the problem sizes + // Preliminaries: we want at least one rank per row + if( n * n < num_procs ) + n = std::sqrt( num_procs ) + 1; + const int N = n * n; // global number of rows + const double h = 1.0 / ( n + 1 ); // mesh cell size + + /* Each rank knows only of its own rows - the range is denoted by ilower + and upper. Here we partition the rows. We account for the fact that + N may not divide evenly by the number of ranks. */ + HYPRE_Int local_size = N / num_procs; + HYPRE_Int extra = N - local_size * num_procs; + + HYPRE_Int ilower = local_size * myid; + ilower += TNL::min( myid, extra ); + + HYPRE_Int iupper = local_size * ( myid + 1 ); + iupper += TNL::min( myid + 1, extra ); + iupper = iupper - 1; + + // How many rows do I have? + local_size = iupper - ilower + 1; + + // Let each rank create its local matrix in the CSR format in TNL + using CSR = TNL::Matrices::SparseMatrix< double, TNL::HYPRE_Device, HYPRE_Int >; + CSR A_local; + A_local.setDimensions( local_size, N ); + + // Allocate row capacities - this must match exactly the sparsity pattern of + // the matrix + typename CSR::RowsCapacitiesType capacities; + capacities.setSize( local_size ); + auto capacities_view = capacities.getView(); + TNL::Algorithms::ParallelFor< TNL::HYPRE_Device >::exec( ilower, iupper + 1, + [=] __cuda_callable__ ( HYPRE_Int i ) mutable + { + int nnz = 0; + + // The left identity block: position i-n + if( i - n >= 0 ) + nnz++; + + // The left -1: position i-1 + if( i % n ) + nnz++; + + // The diagonal: position i + nnz++; + + // The right -1: position i+1 + if( ( i + 1 ) % n ) + nnz++; + + // The right identity block: position i+n + if( i + n < N ) + nnz++; + + // The row index must be converted from global to local + capacities_view[ i - ilower ] = nnz; + } ); + A_local.setRowCapacities( capacities ); + + /* Now assemble the local matrix. Each row has at most 5 entries. For + * example, if n=3: + * A = [M -I 0; -I M -I; 0 -I M] + * M = [4 -1 0; -1 4 -1; 0 -1 4] + */ + A_local.forAllRows( [=] __cuda_callable__ ( typename CSR::RowView& row ) mutable { + // The row index must be converted from local to global + const HYPRE_Int i = ilower + row.getRowIndex(); + int nnz = 0; + + // The left identity block: position i-n + if( i - n >= 0 ) + row.setElement( nnz++, i - n, -1.0 ); + + // The left -1: position i-1 + if( i % n ) + row.setElement( nnz++, i - 1, -1.0 ); + + // The diagonal: position i + row.setElement( nnz++, i, 4.0 ); + + // The right -1: position i+1 + if( ( i + 1 ) % n ) + row.setElement( nnz++, i + 1, -1.0 ); + + // The right identity block: position i+n + if( i + n < N ) + row.setElement( nnz++, i + n, -1.0 ); + } ); + + // Bind the TNL matrix to HypreCSR + TNL::Matrices::HypreCSRMatrix A_local_hypre; + A_local_hypre.bind( A_local ); + + // Assemble the distributed matrix in Hypre from the local blocks + // Note that this is a square matrix, so we indicate the row partition + // size twice (since number of rows = number of cols) + using HypreParCSR = TNL::Matrices::HypreParCSRMatrix; + HypreParCSR parcsr_A = HypreParCSR::fromLocalBlocks( MPI_COMM_WORLD, N, N, {ilower, iupper + 1}, {ilower, iupper + 1}, A_local_hypre ); + + // Deallocate the local matrix since it is not needed anymore in this example + A_local_hypre.reset(); + A_local.reset(); + + + // Create the rhs and solution vectors + TNL::Containers::HypreParVector par_b; + TNL::Containers::HypreParVector par_x; + + par_b.setDistribution( {ilower, iupper + 1}, 0, N, MPI_COMM_WORLD ); + par_x.setDistribution( {ilower, iupper + 1}, 0, N, MPI_COMM_WORLD ); + + // Set the rhs values to h^2 and the solution to zero + par_b.setValue( h * h ); + par_x.setValue( 0.0 ); + + + // Choose a solver and solve the system + + // AMG + if( solver_id == 0 ) { + // Create the solver + TNL::Solvers::Linear::HypreBoomerAMG solver; + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + // NOTE: The wrapper class sets its own default options that are + // different from Hypre. The overriding settings below result in + // the same state as the hypre-ex5.c example. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_BoomerAMGSetPrintLevel( solver, 3 ); // Print solve info + parameters + HYPRE_BoomerAMGSetOldDefault( solver ); // Falgout coarsening with modified classical interpolation + HYPRE_BoomerAMGSetRelaxType( solver, 6 ); // Sym. G-S/Jacobi hybrid relaxation + HYPRE_BoomerAMGSetRelaxOrder( solver, 1 ); // Uses C/F relaxation + HYPRE_BoomerAMGSetNumSweeps( solver, 1 ); // Sweeeps on each level + HYPRE_BoomerAMGSetTol( solver, 1e-7 ); // Convergence tolerance + HYPRE_BoomerAMGSetMaxIter( solver, 20 ); // Use as solver: max iterations + HYPRE_BoomerAMGSetAggNumLevels( solver, 0 ); // number of aggressive coarsening levels + + // Solve the linear system (calls AMG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // PCG + else if( solver_id == 50 ) { + // Create the solver + TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_PCGSetMaxIter( solver, 1000 ); // max iterations + HYPRE_PCGSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_PCGSetTwoNorm( solver, 1 ); // use the two norm as the stopping criteria + HYPRE_PCGSetPrintLevel( solver, 2 ); // prints out the iteration info + + // Set the matrix of the linear system + solver.setMatrix( parcsr_A ); + + // Ignore errors returned from Hypre functions (e.g. when PCG does not + // converge within the maximum iterations limit). + solver.setErrorMode( solver.WARN_HYPRE_ERRORS ); + + // Solve the linear system (calls PCG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // PCG with AMG preconditioner + else if( solver_id == 1 ) { + // Create the solver + TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD ); + + // Create the preconditioner + TNL::Solvers::Linear::HypreBoomerAMG precond; + + // Set the PCG preconditioner + solver.setPreconditioner( precond ); + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_BoomerAMGSetPrintLevel( precond, 1 ); // Print setup info + parameters + HYPRE_BoomerAMGSetOldDefault( precond ); // Falgout coarsening with modified classical interpolation + HYPRE_BoomerAMGSetRelaxType( precond, 6 ); // Sym G.S./Jacobi hybrid relaxation + HYPRE_BoomerAMGSetRelaxOrder( precond, 1 ); // Uses C/F relaxation + HYPRE_BoomerAMGSetAggNumLevels( precond, 0 ); // number of aggressive coarsening levels + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_PCGSetMaxIter( solver, 1000 ); // max iterations + HYPRE_PCGSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_PCGSetTwoNorm( solver, 1 ); // use the two norm as the stopping criteria + HYPRE_PCGSetPrintLevel( solver, 2 ); // prints out the iteration info + + // Solve the linear system (calls PCG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // PCG with ParaSails preconditioner + else if( solver_id == 8 ) { + // Create the solver + TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD ); + + // Create the preconditioner + TNL::Solvers::Linear::HypreParaSails precond( MPI_COMM_WORLD ); + + // Set the PCG preconditioner + solver.setPreconditioner( precond ); + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_PCGSetMaxIter( solver, 1000 ); // max iterations + HYPRE_PCGSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_PCGSetTwoNorm( solver, 1 ); // use the two norm as the stopping criteria + HYPRE_PCGSetPrintLevel( solver, 2 ); // prints out the iteration info + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_ParaSailsSetParams( precond, 0.1, 1 ); // threshold and max levels + HYPRE_ParaSailsSetFilter( precond, 0.05 ); + HYPRE_ParaSailsSetSym( precond, 1 ); + + // Solve the linear system (calls PCG setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + // Flexible GMRES with AMG preconditioner + else if( solver_id == 61 ) { + // Create the solver + TNL::Solvers::Linear::HypreFlexGMRES solver( MPI_COMM_WORLD ); + + // Create the preconditioner + TNL::Solvers::Linear::HypreBoomerAMG precond; + + // Set the FlexGMRES preconditioner + solver.setPreconditioner( precond ); + + // Set the matrix of the linear system + // WARNING: setMatrix resets the preconditioner, including setting + // default options. + solver.setMatrix( parcsr_A ); + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_FlexGMRESSetKDim( solver, 30 ); // restart parameter + HYPRE_FlexGMRESSetMaxIter( solver, 1000 ); // max iterations + HYPRE_FlexGMRESSetTol( solver, 1e-7 ); // convergence tolerance + HYPRE_FlexGMRESSetPrintLevel( solver, 2 ); // print solve info + + // Set some parameters (See Reference Manual for more parameters) + HYPRE_BoomerAMGSetPrintLevel( precond, 1 ); // Print setup info + parameters + HYPRE_BoomerAMGSetOldDefault( precond ); // Falgout coarsening with modified classical interpolation + HYPRE_BoomerAMGSetRelaxType( precond, 6 ); // Sym G.S./Jacobi hybrid relaxation + HYPRE_BoomerAMGSetRelaxOrder( precond, 1 ); // Uses C/F relaxation + HYPRE_BoomerAMGSetAggNumLevels( precond, 0 ); // number of aggressive coarsening levels + + // This is an optional call - if you don't call it, + // hypre_FlexGMRESModifyPCDefault is used - which does nothing. + // Otherwise, you can define your own, similar to the one used here + HYPRE_FlexGMRESSetModifyPC( solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample ); + + // Solve the linear system (calls FlexGMRES setup, solve, and prints final residual norm) + solver.solve( par_b, par_x ); + } + else if( myid == 0 ) { + std::cerr << "Invalid solver id specified." << std::endl; + return EXIT_FAILURE; + } + + // Save the solution for GLVis visualization, see glvis-ex5.sh + if( vis ) { + char filename[ 255 ]; + sprintf( filename, "%s.%06d", "vis_tnl/ex5.sol", myid ); + FILE* file = fopen( filename, "w" ); + if( file == nullptr ) { + printf( "Error: can't open output file %s\n", filename ); + return EXIT_FAILURE; + } + + // Save the solution + const auto local_x = par_x.getConstLocalView(); + for( HYPRE_Int i = 0; i < local_size; i++ ) + fprintf( file, "%.14e\n", local_x[ i ] ); + + fflush( file ); + fclose( file ); + + // Save the global finite element mesh + if( myid == 0 ) + GLVis_PrintGlobalSquareMesh( "vis_tnl/ex5.mesh", n - 1 ); + } + + return EXIT_SUCCESS; +} + +/** + * This is an example (not recommended) of how we can modify things about AMG + * that affect the solve phase based on how FlexGMRES is doing... For another + * preconditioner it may make sense to modify the tolerance. + */ +int +hypre_FlexGMRESModifyPCAMGExample( void* precond_data, int iterations, double rel_residual_norm ) +{ + if( rel_residual_norm > .1 ) + HYPRE_BoomerAMGSetNumSweeps( (HYPRE_Solver) precond_data, 10 ); + else + HYPRE_BoomerAMGSetNumSweeps( (HYPRE_Solver) precond_data, 1 ); + return 0; +} diff --git a/src/Examples/Hypre/verify-ex5.sh b/src/Examples/Hypre/verify-ex5.sh new file mode 100755 index 0000000000000000000000000000000000000000..6ca17d194d01a4859547298e58ba622c9256db6d --- /dev/null +++ b/src/Examples/Hypre/verify-ex5.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e + +# make output directories +mkdir -p vis vis_tnl + +# dbg suffix for binaries +#dbg="" +dbg="-dbg" + +# problem size: N=n^2 +n=100 + +# disable OpenMP for reproducibility +export OMP_NUM_THREADS=1 + +make_glvis() { + local dir="$1" + local sol="$dir/ex5.sol" + + echo "FiniteElementSpace" > "$sol" + echo "FiniteElementCollection: H1_2D_P1" >> "$sol" + echo "VDim: 1" >> "$sol" + echo "Ordering: 0" >> "$sol" + echo "" >> "$sol" + find "$dir" -name "ex5.sol.??????" | sort | xargs cat >> "$sol" +} + +verify() { + make_glvis vis + make_glvis vis_tnl + diff "vis/ex5.sol" "vis_tnl/ex5.sol" + rm -f vis/ex5.sol* vis_tnl/ex5.sol* +} + +# BoomerAMG +mpirun hypre-ex5$dbg -vis -n $n -solver 0 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 0 +verify + +# PCG +mpirun hypre-ex5$dbg -vis -n $n -solver 50 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 50 +verify + +# PCG with AMG +mpirun hypre-ex5$dbg -vis -n $n -solver 1 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 1 +verify + +# PCG with ParaSails +mpirun hypre-ex5$dbg -vis -n $n -solver 8 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 8 +verify + +# FlexGMRES with AMG +mpirun hypre-ex5$dbg -vis -n $n -solver 61 +mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 61 +verify diff --git a/src/Examples/Hypre/vis.c b/src/Examples/Hypre/vis.c new file mode 100644 index 0000000000000000000000000000000000000000..b051fb90e8e6f4bf1ddd30461d1a8482f2fccdb2 --- /dev/null +++ b/src/Examples/Hypre/vis.c @@ -0,0 +1,60 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* Save a structured n x n mesh of square elements on the unit square into a + GLVis mesh file with the given name. */ +#include <math.h> +void GLVis_PrintGlobalSquareMesh(const char *meshfile, int n) +{ + FILE *file; + + int Dim = 2; + int NumOfVertices = (n + 1) * (n + 1); + int NumOfElements = n * n; + + int i, j; + double x, y; + double h = 1.0 / n; + + if ((file = fopen(meshfile, "w")) == NULL) + { + printf("Error: can't open output file %s\n", meshfile); + exit(1); + } + + /* mesh header */ + fprintf(file, "MFEM mesh v1.0\n"); + fprintf(file, "\ndimension\n"); + fprintf(file, "%d\n", Dim); + + /* mesh elements */ + fprintf(file, "\nelements\n"); + fprintf(file, "%d\n", NumOfElements); + for (j = 0; j < n; j++) + for (i = 0; i < n; i++) + fprintf(file, "1 3 %d %d %d %d\n", i + j * (n + 1), i + 1 + j * (n + 1), + i + 1 + (j + 1) * (n + 1), i + (j + 1) * (n + 1)); + + /* boundary will be generated by GLVis */ + fprintf(file, "\nboundary\n"); + fprintf(file, "0\n"); + + /* mesh vertices */ + fprintf(file, "\nvertices\n"); + fprintf(file, "%d\n", NumOfVertices); + fprintf(file, "%d\n", Dim); + for (j = 0; j < n + 1; j++) + for (i = 0; i < n + 1; i++) + { + x = i * h; + y = j * h; + fprintf(file, "%.14e %.14e\n", x, y); + } + + fflush(file); + fclose(file); +}