diff --git a/Documentation/Doxyfile b/Documentation/Doxyfile
index 58f4b4b1f5dd14df7d91fabad213d05e09fcf08e..8b3faf7032228a2d66bdb75a1784db70e0e14a07 100644
--- a/Documentation/Doxyfile
+++ b/Documentation/Doxyfile
@@ -2123,6 +2123,7 @@ INCLUDE_FILE_PATTERNS  =
 PREDEFINED = DOXYGEN_ONLY
 PREDEFINED += HAVE_MPI
 PREDEFINED += HAVE_CUDA
+PREDEFINED += HAVE_HYPRE
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/Documentation/Pages/main-page.md b/Documentation/Pages/main-page.md
index 8ad6e6d80bbce5da120f208e30e6d9e5a0e3a9e4..32d97d75f47c039c34356a4ae9b777df4352aa69 100644
--- a/Documentation/Pages/main-page.md
+++ b/Documentation/Pages/main-page.md
@@ -178,6 +178,11 @@ computing platform, and (optionally) some libraries.
       <td> </td>
       <td> Only used for the compilation of the `tnl-decompose-mesh` tool. </td>
   </tr>
+  <tr><td> [Hypre](https://github.com/hypre-space/hypre) </td>
+      <td> \ref Hypre "Wrappers for Hypre solvers" </td>
+      <td> `-DHAVE_HYPRE -lHYPRE` </td>
+      <td> Attention should be paid to Hypre build options, e.g. `--enable-bigint`. </td>
+  </tr>
   <tr><td> [libpng](http://www.libpng.org/pub/png/libpng.html) </td>
       <td> \ref TNL::Images "Image processing" classes </td>
       <td> `-DHAVE_PNG_H -lpng` </td>
diff --git a/cmake/FindHypre.cmake b/cmake/FindHypre.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..4b87af73902ecfb65f415ae2edc392165e5ab086
--- /dev/null
+++ b/cmake/FindHypre.cmake
@@ -0,0 +1,78 @@
+################################################################################
+#
+# \file      FindHypre.cmake
+# \copyright 2012-2015 J. Bakosi,
+#            2016-2018 Los Alamos National Security, LLC.,
+#            2019-2021 Triad National Security, LLC.
+#            All rights reserved. See the LICENSE file for details.
+# \brief     Find the Hypre library from LLNL
+#
+################################################################################
+
+# Hypre: https://github.com/LLNL/hypre
+#
+#  HYPRE_FOUND - System has Hypre
+#  HYPRE_INCLUDE_DIRS - The Hypre include directory
+#  HYPRE_LIBRARIES - The libraries needed to use Hypre
+#
+#  Set HYPRE_ROOT before calling find_package to a path to add an additional
+#  search path, e.g.,
+#
+#  Usage:
+#
+#  set(HYPRE_ROOT "/path/to/custom/hypre") # prefer over system
+#  find_package(Hypre)
+#  if(HYPRE_FOUND)
+#    target_link_libraries (TARGET ${HYPRE_LIBRARIES})
+#  endif()
+
+function(_HYPRE_GET_VERSION _OUT_ver _version_hdr)
+  file(STRINGS ${_version_hdr} _contents REGEX "#define HYPRE_RELEASE_VERSION[ \t]+")
+  if(_contents)
+    string(REGEX REPLACE "\"" "" _cont "${_contents}")
+    string(REGEX REPLACE ".*#define HYPRE_RELEASE_VERSION[ \t]+([0-9.]+).*" "\\1" ${_OUT_ver} "${_cont}")
+    if(NOT ${${_OUT_ver}} MATCHES "[0-9]+")
+        message(FATAL_ERROR "Version parsing failed for HYPRE_RELEASE_VERSION in ${_version_hdr}!")
+    endif()
+    set(${_OUT_ver} ${${_OUT_ver}} PARENT_SCOPE)
+ elseif(EXISTS ${_version_hdr})
+    message(FATAL_ERROR "No HYPRE_RELEASE_VERSION in ${_version_hdr}")
+ else()
+    message(FATAL_ERROR "Include file ${_version_hdr} does not exist")
+  endif()
+endfunction()
+
+# If already in cache, be silent
+if(HYPRE_INCLUDE_DIRS AND HYPRE_LIBRARIES)
+  set (HYPRE_FIND_QUIETLY TRUE)
+endif()
+
+if (HYPRE_ROOT)
+  set(HYPRE_SEARCH_OPTS NO_DEFAULT_PATH)
+else()
+  set(HYPRE_ROOT "/usr")
+endif()
+
+find_path(HYPRE_INCLUDE_DIR NAMES HYPRE.h
+                            PATH_SUFFIXES hypre
+                            HINTS ${HYPRE_ROOT}/include ${HYPRE_ROOT}/include/hypre
+                            ${HYPRE_SEARCH_OPTS})
+
+if(HYPRE_INCLUDE_DIR)
+  _HYPRE_GET_VERSION(HYPRE_VERSION ${HYPRE_INCLUDE_DIR}/HYPRE_config.h)
+  set(HYPRE_INCLUDE_DIRS ${HYPRE_INCLUDE_DIR})
+else()
+  set(HYPRE_VERSION 0.0.0)
+  set(HYPRE_INCLUDE_DIRS "")
+endif()
+
+find_library(HYPRE_LIBRARY NAMES HYPRE HINTS ${HYPRE_ROOT}/lib)
+
+set(HYPRE_LIBRARIES ${HYPRE_LIBRARY})
+
+# Handle the QUIETLY and REQUIRED arguments and set HYPRE_FOUND to TRUE if
+# all listed variables are TRUE.
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Hypre REQUIRED_VARS HYPRE_LIBRARIES HYPRE_INCLUDE_DIRS VERSION_VAR HYPRE_VERSION)
+
+MARK_AS_ADVANCED(HYPRE_INCLUDE_DIRS HYPRE_LIBRARIES)
diff --git a/src/Examples/CMakeLists.txt b/src/Examples/CMakeLists.txt
index 1e8a400bcbbc68dd3aec2b06bd4592372c05e13c..11ce01f0ed78ef144af39241607677d91c6063dc 100644
--- a/src/Examples/CMakeLists.txt
+++ b/src/Examples/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory( Hypre )
+
 #add_subdirectory( simple-examples )
 #add_subdirectory( Hamilton-Jacobi )
 #add_subdirectory( heat-equation )
diff --git a/src/Examples/Hypre/CMakeLists.txt b/src/Examples/Hypre/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c9eb9184bf5cb46574592bea7d8e3357f64f8b5
--- /dev/null
+++ b/src/Examples/Hypre/CMakeLists.txt
@@ -0,0 +1,13 @@
+if( ${BUILD_MPI} )
+   find_package(Hypre)
+   if( ${HYPRE_FOUND} )
+      foreach( source IN ITEMS hypre-ex5.c tnl-hypre-ex5.cpp )
+         string( REGEX REPLACE "\.cpp|\.c" "" target ${source} )
+         add_executable( ${target} ${source} )
+         target_compile_definitions( ${target} PUBLIC "-DHAVE_HYPRE" )
+         target_include_directories( ${target} PUBLIC ${HYPRE_INCLUDE_DIRS} )
+         target_link_libraries( ${target} ${HYPRE_LIBRARIES} -lm )
+         install( TARGETS ${target} RUNTIME DESTINATION bin )
+      endforeach()
+   endif()
+endif()
diff --git a/src/Examples/Hypre/ex.h b/src/Examples/Hypre/ex.h
new file mode 100644
index 0000000000000000000000000000000000000000..8b2324c1700b1b8a23d9e34cba84c1808dbf13e1
--- /dev/null
+++ b/src/Examples/Hypre/ex.h
@@ -0,0 +1,47 @@
+/******************************************************************************
+ * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
+ * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ ******************************************************************************/
+
+/*--------------------------------------------------------------------------
+ * Header file for examples
+ *--------------------------------------------------------------------------*/
+
+#ifndef HYPRE_EXAMPLES_INCLUDES
+#define HYPRE_EXAMPLES_INCLUDES
+
+#include <HYPRE_config.h>
+
+#if defined(HYPRE_EXAMPLE_USING_CUDA)
+
+#include <cuda_runtime.h>
+
+#ifndef HYPRE_USING_UNIFIED_MEMORY
+#error *** Running the examples on GPUs requires Unified Memory. Please reconfigure and rebuild with --enable-unified-memory ***
+#endif
+
+static inline void*
+gpu_malloc(size_t size)
+{
+   void *ptr = NULL;
+   cudaMallocManaged(&ptr, size, cudaMemAttachGlobal);
+   return ptr;
+}
+
+static inline void*
+gpu_calloc(size_t num, size_t size)
+{
+   void *ptr = NULL;
+   cudaMallocManaged(&ptr, num * size, cudaMemAttachGlobal);
+   cudaMemset(ptr, 0, num * size);
+   return ptr;
+}
+
+#define malloc(size) gpu_malloc(size)
+#define calloc(num, size) gpu_calloc(num, size)
+#define free(ptr) ( cudaFree(ptr), ptr = NULL )
+#endif /* #if defined(HYPRE_EXAMPLE_USING_CUDA) */
+#endif /* #ifndef HYPRE_EXAMPLES_INCLUDES */
+
diff --git a/src/Examples/Hypre/glvis-ex5.sh b/src/Examples/Hypre/glvis-ex5.sh
new file mode 100755
index 0000000000000000000000000000000000000000..403dcf447772edf7ed6ddb484c7abafa3307dcfa
--- /dev/null
+++ b/src/Examples/Hypre/glvis-ex5.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+ex=ex5
+keys=Aaamc
+
+dir="${1:-.}"
+mesh="$dir/$ex.mesh"
+sol="$dir/$ex.sol"
+
+if ! test -e "$mesh"; then
+    echo "Error: cannot find mesh file for $ex"
+    exit 1
+fi
+
+echo "FiniteElementSpace" > "$sol"
+echo "FiniteElementCollection: H1_2D_P1" >> "$sol"
+echo "VDim: 1" >> "$sol"
+echo "Ordering: 0" >> "$sol"
+echo "" >> "$sol"
+find "$dir" -name "$ex.sol.??????" | sort | xargs cat >> "$sol"
+
+glvis -m "$mesh" -g "$sol" -k "$keys"
diff --git a/src/Examples/Hypre/hypre-ex5.c b/src/Examples/Hypre/hypre-ex5.c
new file mode 100644
index 0000000000000000000000000000000000000000..79c3128afa363e8474fa8c7d9b99df6c931f110e
--- /dev/null
+++ b/src/Examples/Hypre/hypre-ex5.c
@@ -0,0 +1,674 @@
+/******************************************************************************
+ * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
+ * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ ******************************************************************************/
+
+/*
+   Example 5
+
+   Interface:    Linear-Algebraic (IJ)
+
+   Compile with: use the install script from TNL
+
+   Sample run:   OMP_NUM_THREADS=1 mpirun -np 4 hypre-ex5
+
+   Description:
+
+   This example solves the 2-D Laplacian problem with zero boundary conditions
+   on an n x n grid.  The number of unknowns is N=n^2.  The standard 5-point
+   stencil is used, and we solve for the interior nodes only.
+
+   The example is based on Hypre's Example 5 and its modification "ex5big" that
+   illustrates the 64-bit integer support in Hypre needed to run problems with
+   more than 2B unknowns.  We recommend comparing this example with both
+   examples in Hypre:
+
+   - https://github.com/hypre-space/hypre/blob/master/src/examples/ex5.c
+   - https://github.com/hypre-space/hypre/blob/master/src/examples/ex5big.c
+
+   To enable the 64-bit integer support, you need to build Hypre with the
+   --enable-bigint option of the configure script.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include <HYPRE_krylov.h>
+#include <HYPRE.h>
+#include <HYPRE_parcsr_ls.h>
+
+#include "ex.h"
+#include "vis.c"
+
+int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, int iterations,
+                                      double rel_residual_norm);
+
+#define my_min(a,b)  (((a)<(b)) ? (a) : (b))
+
+int main (int argc, char *argv[])
+{
+   HYPRE_Int i;
+   int myid, num_procs;
+   int N, n;
+
+   HYPRE_Int ilower, iupper;
+   HYPRE_Int local_size, extra;
+
+   int solver_id;
+   int vis, print_system;
+
+   double h, h2;
+
+   HYPRE_IJMatrix A;
+   HYPRE_ParCSRMatrix parcsr_A;
+   HYPRE_IJVector b;
+   HYPRE_ParVector par_b;
+   HYPRE_IJVector x;
+   HYPRE_ParVector par_x;
+
+   HYPRE_Solver solver, precond;
+
+   /* Initialize MPI */
+   MPI_Init(&argc, &argv);
+   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+   MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+
+   /* Initialize HYPRE */
+   HYPRE_Init();
+
+   /* Print GPU info */
+   /* HYPRE_PrintDeviceInfo(); */
+#if defined(HYPRE_USING_GPU)
+   /* use cuSPARSE for SpGEMM */
+   HYPRE_SetSpGemmUseCusparse(0);
+#endif
+
+   /* Default problem parameters */
+   n = 33;
+   solver_id = 0;
+   vis = 0;
+   print_system = 0;
+
+
+   /* Parse command line */
+   {
+      int arg_index = 0;
+      int print_usage = 0;
+
+      while (arg_index < argc)
+      {
+         if ( strcmp(argv[arg_index], "-n") == 0 )
+         {
+            arg_index++;
+            n = atoi(argv[arg_index++]);
+         }
+         else if ( strcmp(argv[arg_index], "-solver") == 0 )
+         {
+            arg_index++;
+            solver_id = atoi(argv[arg_index++]);
+         }
+         else if ( strcmp(argv[arg_index], "-vis") == 0 )
+         {
+            arg_index++;
+            vis = 1;
+         }
+         else if ( strcmp(argv[arg_index], "-print_system") == 0 )
+         {
+            arg_index++;
+            print_system = 1;
+         }
+         else if ( strcmp(argv[arg_index], "-help") == 0 )
+         {
+            print_usage = 1;
+            break;
+         }
+         else
+         {
+            arg_index++;
+         }
+      }
+
+      if ((print_usage) && (myid == 0))
+      {
+         printf("\n");
+         printf("Usage: %s [<options>]\n", argv[0]);
+         printf("\n");
+         printf("  -n <n>              : problem size in each direction (default: 33)\n");
+         printf("  -solver <ID>        : solver ID\n");
+         printf("                        0  - AMG (default) \n");
+         printf("                        1  - AMG-PCG\n");
+         printf("                        8  - ParaSails-PCG\n");
+         printf("                        50 - PCG\n");
+         printf("                        61 - AMG-FlexGMRES\n");
+         printf("  -vis                : save the solution for GLVis visualization\n");
+         printf("  -print_system       : print the matrix and rhs\n");
+         printf("\n");
+      }
+
+      if (print_usage)
+      {
+         HYPRE_Finalize();
+         MPI_Finalize();
+         return (0);
+      }
+   }
+
+   /* Preliminaries: want at least one processor per row */
+   if (n * n < num_procs) { n = sqrt(num_procs) + 1; }
+   N = n * n; /* global number of rows */
+   h = 1.0 / (n + 1); /* mesh size*/
+   h2 = h * h;
+
+   /* Each processor knows only of its own rows - the range is denoted by ilower
+      and upper.  Here we partition the rows. We account for the fact that
+      N may not divide evenly by the number of processors. */
+   local_size = N / num_procs;
+   extra = N - local_size * num_procs;
+
+   ilower = local_size * myid;
+   ilower += my_min(myid, extra);
+
+   iupper = local_size * (myid + 1);
+   iupper += my_min(myid + 1, extra);
+   iupper = iupper - 1;
+
+   /* How many rows do I have? */
+   local_size = iupper - ilower + 1;
+
+   /* Create the matrix.
+      Note that this is a square matrix, so we indicate the row partition
+      size twice (since number of rows = number of cols) */
+   HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A);
+
+   /* Choose a parallel csr format storage (see the User's Manual) */
+   HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR);
+
+   /* Initialize before setting coefficients */
+   HYPRE_IJMatrixInitialize(A);
+
+   /* Now go through my local rows and set the matrix entries.
+      Each row has at most 5 entries. For example, if n=3:
+
+      A = [M -I 0; -I M -I; 0 -I M]
+      M = [4 -1 0; -1 4 -1; 0 -1 4]
+
+      Note that here we are setting one row at a time, though
+      one could set all the rows together (see the User's Manual).
+   */
+   {
+      int nnz;
+      /* OK to use constant-length arrays for CPUs
+      double values[5];
+      HYPRE_Int cols[5];
+      */
+      double *values = (double *) malloc(5 * sizeof(double));
+      HYPRE_Int *cols = (HYPRE_Int *) malloc(5 * sizeof(HYPRE_Int));
+      HYPRE_Int *tmp = (HYPRE_Int *) malloc(2 * sizeof(HYPRE_Int));
+
+      for (i = ilower; i <= iupper; i++)
+      {
+         nnz = 0;
+
+         /* The left identity block:position i-n */
+         if ((i - n) >= 0)
+         {
+            cols[nnz] = i - n;
+            values[nnz] = -1.0;
+            nnz++;
+         }
+
+         /* The left -1: position i-1 */
+         if (i % n)
+         {
+            cols[nnz] = i - 1;
+            values[nnz] = -1.0;
+            nnz++;
+         }
+
+         /* Set the diagonal: position i */
+         cols[nnz] = i;
+         values[nnz] = 4.0;
+         nnz++;
+
+         /* The right -1: position i+1 */
+         if ((i + 1) % n)
+         {
+            cols[nnz] = i + 1;
+            values[nnz] = -1.0;
+            nnz++;
+         }
+
+         /* The right identity block:position i+n */
+         if ((i + n) < N)
+         {
+            cols[nnz] = i + n;
+            values[nnz] = -1.0;
+            nnz++;
+         }
+
+         /* Set the values for row i */
+         tmp[0] = nnz;
+         tmp[1] = i;
+         HYPRE_IJMatrixSetValues(A, 1, &tmp[0], &tmp[1], cols, values);
+      }
+
+      free(values);
+      free(cols);
+      free(tmp);
+   }
+
+   /* Assemble after setting the coefficients */
+   HYPRE_IJMatrixAssemble(A);
+
+   /* Note: for the testing of small problems, one may wish to read
+      in a matrix in IJ format (for the format, see the output files
+      from the -print_system option).
+      In this case, one would use the following routine:
+      HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD,
+                          HYPRE_PARCSR, &A );
+      <filename>  = IJ.A.out to read in what has been printed out
+      by -print_system (processor numbers are omitted).
+      A call to HYPRE_IJMatrixRead is an *alternative* to the
+      following sequence of HYPRE_IJMatrix calls:
+      Create, SetObjectType, Initialize, SetValues, and Assemble
+   */
+
+
+   /* Get the parcsr matrix object to use */
+   HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A);
+
+
+   /* Create the rhs and solution */
+   HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper, &b);
+   HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR);
+   HYPRE_IJVectorInitialize(b);
+
+   HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper, &x);
+   HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR);
+   HYPRE_IJVectorInitialize(x);
+
+   /* Set the rhs values to h^2 and the solution to zero */
+   {
+      double *rhs_values, *x_values;
+      HYPRE_Int *rows;
+
+      rhs_values = (double*) calloc(local_size, sizeof(double));
+      x_values = (double*) calloc(local_size, sizeof(double));
+      rows = (HYPRE_Int*) calloc(local_size, sizeof(HYPRE_Int));
+
+      for (i = 0; i < local_size; i++)
+      {
+         rhs_values[i] = h2;
+         x_values[i] = 0.0;
+         rows[i] = ilower + i;
+      }
+
+      HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values);
+      HYPRE_IJVectorSetValues(x, local_size, rows, x_values);
+
+      free(x_values);
+      free(rhs_values);
+      free(rows);
+   }
+
+
+   HYPRE_IJVectorAssemble(b);
+   /*  As with the matrix, for testing purposes, one may wish to read in a rhs:
+       HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD,
+                                 HYPRE_PARCSR, &b );
+       as an alternative to the
+       following sequence of HYPRE_IJVectors calls:
+       Create, SetObjectType, Initialize, SetValues, and Assemble
+   */
+   HYPRE_IJVectorGetObject(b, (void **) &par_b);
+
+   HYPRE_IJVectorAssemble(x);
+   HYPRE_IJVectorGetObject(x, (void **) &par_x);
+
+
+   /*  Print out the system  - files names will be IJ.out.A.XXXXX
+        and IJ.out.b.XXXXX, where XXXXX = processor id */
+   if (print_system)
+   {
+      HYPRE_IJMatrixPrint(A, "IJ.out.A");
+      HYPRE_IJVectorPrint(b, "IJ.out.b");
+   }
+
+
+   /* Choose a solver and solve the system */
+
+   /* AMG */
+   if (solver_id == 0)
+   {
+      HYPRE_Int num_iterations;
+      double final_res_norm;
+
+      /* Create solver */
+      HYPRE_BoomerAMGCreate(&solver);
+
+      /* Set some parameters (See Reference Manual for more parameters) */
+      HYPRE_BoomerAMGSetPrintLevel(solver, 3);  /* print solve info + parameters */
+      HYPRE_BoomerAMGSetOldDefault(solver); /* Falgout coarsening with modified classical interpolation */
+      HYPRE_BoomerAMGSetRelaxType(solver, 6);   /* Sym. G-S/Jacobi hybrid relaxation */
+      HYPRE_BoomerAMGSetRelaxOrder(solver, 1);  /* Uses C/F relaxation */
+      HYPRE_BoomerAMGSetNumSweeps(solver, 1);   /* Sweeeps on each level */
+      HYPRE_BoomerAMGSetTol(solver, 1e-7);      /* conv. tolerance */
+
+      /* Now setup and solve! */
+      HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x);
+      HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x);
+
+      /* Run info - needed logging turned on */
+      HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations);
+      HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm);
+      if (myid == 0)
+      {
+         printf("\n");
+         printf("Iterations = %lld\n", (long long int) num_iterations);
+         printf("Final Relative Residual Norm = %e\n", final_res_norm);
+         printf("\n");
+      }
+
+      /* Destroy solver */
+      HYPRE_BoomerAMGDestroy(solver);
+   }
+   /* PCG */
+   else if (solver_id == 50)
+   {
+      HYPRE_Int num_iterations;
+      double final_res_norm;
+
+      /* Create solver */
+      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
+
+      /* Set some parameters (See Reference Manual for more parameters) */
+      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
+      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
+      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
+      HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */
+      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
+
+      /* Now setup and solve! */
+      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
+      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);
+
+      /* Run info - needed logging turned on */
+      HYPRE_PCGGetNumIterations(solver, &num_iterations);
+      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
+      if (myid == 0)
+      {
+         printf("\n");
+         printf("Iterations = %lld\n", (long long int) num_iterations);
+         printf("Final Relative Residual Norm = %e\n", final_res_norm);
+         printf("\n");
+      }
+
+      /* Destroy solver */
+      HYPRE_ParCSRPCGDestroy(solver);
+   }
+   /* PCG with AMG preconditioner */
+   else if (solver_id == 1)
+   {
+      HYPRE_Int num_iterations;
+      double final_res_norm;
+
+      /* Create solver */
+      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
+
+      /* Set some parameters (See Reference Manual for more parameters) */
+      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
+      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
+      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
+      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
+      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
+
+      /* Now set up the AMG preconditioner and specify any parameters */
+      HYPRE_BoomerAMGCreate(&precond);
+      HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */
+      HYPRE_BoomerAMGSetCoarsenType(precond, 6);
+      HYPRE_BoomerAMGSetOldDefault(precond);
+      HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid relaxation */
+      HYPRE_BoomerAMGSetRelaxOrder(precond, 1); /* Uses C/F relaxation */
+      HYPRE_BoomerAMGSetNumSweeps(precond, 1);
+      HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */
+      HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */
+
+      /* Set the PCG preconditioner */
+      HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
+                          (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond);
+
+      /* Now setup and solve! */
+      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
+      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);
+
+      /* Run info - needed logging turned on */
+      HYPRE_PCGGetNumIterations(solver, &num_iterations);
+      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
+      if (myid == 0)
+      {
+         printf("\n");
+         printf("Iterations = %lld\n", (long long int) num_iterations);
+         printf("Final Relative Residual Norm = %e\n", final_res_norm);
+         printf("\n");
+      }
+
+      /* Destroy solver and preconditioner */
+      HYPRE_ParCSRPCGDestroy(solver);
+      HYPRE_BoomerAMGDestroy(precond);
+   }
+   /* PCG with Parasails Preconditioner */
+   else if (solver_id == 8)
+   {
+      HYPRE_Int num_iterations;
+      double final_res_norm;
+
+      int      sai_max_levels = 1;
+      double   sai_threshold = 0.1;
+      double   sai_filter = 0.05;
+      int      sai_sym = 1;
+
+      /* Create solver */
+      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
+
+      /* Set some parameters (See Reference Manual for more parameters) */
+      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
+      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
+      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
+      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
+      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
+
+      /* Now set up the ParaSails preconditioner and specify any parameters */
+      HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond);
+
+      /* Set some parameters (See Reference Manual for more parameters) */
+      HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels);
+      HYPRE_ParaSailsSetFilter(precond, sai_filter);
+      HYPRE_ParaSailsSetSym(precond, sai_sym);
+      HYPRE_ParaSailsSetLogging(precond, 3);
+
+      /* Set the PCG preconditioner */
+      HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
+                          (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond);
+
+      /* Now setup and solve! */
+      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
+      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);
+
+
+      /* Run info - needed logging turned on */
+      HYPRE_PCGGetNumIterations(solver, &num_iterations);
+      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
+      if (myid == 0)
+      {
+         printf("\n");
+         printf("Iterations = %lld\n", (long long int) num_iterations);
+         printf("Final Relative Residual Norm = %e\n", final_res_norm);
+         printf("\n");
+      }
+
+      /* Destory solver and preconditioner */
+      HYPRE_ParCSRPCGDestroy(solver);
+      HYPRE_ParaSailsDestroy(precond);
+   }
+   /* Flexible GMRES with  AMG Preconditioner */
+   else if (solver_id == 61)
+   {
+      HYPRE_Int num_iterations;
+      double final_res_norm;
+      int    restart = 30;
+      int    modify = 1;
+
+
+      /* Create solver */
+      HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver);
+
+      /* Set some parameters (See Reference Manual for more parameters) */
+      HYPRE_FlexGMRESSetKDim(solver, restart);
+      HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */
+      HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */
+      HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */
+      HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */
+
+
+      /* Now set up the AMG preconditioner and specify any parameters */
+      HYPRE_BoomerAMGCreate(&precond);
+      HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */
+      HYPRE_BoomerAMGSetCoarsenType(precond, 6);
+      HYPRE_BoomerAMGSetOldDefault(precond);
+      HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid relaxation */
+      HYPRE_BoomerAMGSetRelaxOrder(precond, 1); /* Uses C/F relaxation */
+      HYPRE_BoomerAMGSetNumSweeps(precond, 1);
+      HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */
+      HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */
+
+      /* Set the FlexGMRES preconditioner */
+      HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
+                                (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond);
+
+
+      if (modify)
+      {
+         /* this is an optional call  - if you don't call it, hypre_FlexGMRESModifyPCDefault
+            is used - which does nothing.  Otherwise, you can define your own, similar to
+            the one used here */
+         HYPRE_FlexGMRESSetModifyPC(
+            solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample);
+      }
+
+
+      /* Now setup and solve! */
+      HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x);
+      HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x);
+
+      /* Run info - needed logging turned on */
+      HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations);
+      HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm);
+      if (myid == 0)
+      {
+         printf("\n");
+         printf("Iterations = %lld\n", (long long int) num_iterations);
+         printf("Final Relative Residual Norm = %e\n", final_res_norm);
+         printf("\n");
+      }
+
+      /* Destory solver and preconditioner */
+      HYPRE_ParCSRFlexGMRESDestroy(solver);
+      HYPRE_BoomerAMGDestroy(precond);
+
+   }
+   else
+   {
+      if (myid == 0) { printf("Invalid solver id specified.\n"); }
+   }
+
+   /* Save the solution for GLVis visualization, see vis/glvis-ex5.sh */
+   if (vis)
+   {
+      FILE *file;
+      char filename[255];
+
+      int nvalues = local_size;
+      int *rows = (int*) calloc(nvalues, sizeof(int));
+      double *values =  (double*) calloc(nvalues, sizeof(double));
+
+      for (i = 0; i < nvalues; i++)
+      {
+         rows[i] = ilower + i;
+      }
+
+      /* get the local solution */
+      HYPRE_IJVectorGetValues(x, nvalues, rows, values);
+
+      sprintf(filename, "%s.%06d", "vis/ex5.sol", myid);
+      if ((file = fopen(filename, "w")) == NULL)
+      {
+         printf("Error: can't open output file %s\n", filename);
+         HYPRE_Finalize();
+         MPI_Finalize();
+         exit(1);
+      }
+
+      /* save solution */
+      for (i = 0; i < nvalues; i++)
+      {
+         fprintf(file, "%.14e\n", values[i]);
+      }
+
+      fflush(file);
+      fclose(file);
+
+      free(rows);
+      free(values);
+
+      /* save global finite element mesh */
+      if (myid == 0)
+      {
+         GLVis_PrintGlobalSquareMesh("vis/ex5.mesh", n - 1);
+      }
+   }
+
+   /* Clean up */
+   HYPRE_IJMatrixDestroy(A);
+   HYPRE_IJVectorDestroy(b);
+   HYPRE_IJVectorDestroy(x);
+
+   /* Finalize HYPRE */
+   HYPRE_Finalize();
+
+   /* Finalize MPI*/
+   MPI_Finalize();
+
+   return (0);
+}
+
+/*--------------------------------------------------------------------------
+   hypre_FlexGMRESModifyPCAMGExample -
+
+    This is an example (not recommended)
+   of how we can modify things about AMG that
+   affect the solve phase based on how FlexGMRES is doing...For
+   another preconditioner it may make sense to modify the tolerance..
+
+ *--------------------------------------------------------------------------*/
+
+int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, int iterations,
+                                      double rel_residual_norm)
+{
+
+
+   if (rel_residual_norm > .1)
+   {
+      HYPRE_BoomerAMGSetNumSweeps((HYPRE_Solver)precond_data, 10);
+   }
+   else
+   {
+      HYPRE_BoomerAMGSetNumSweeps((HYPRE_Solver)precond_data, 1);
+   }
+
+
+   return 0;
+}
diff --git a/src/Examples/Hypre/tnl-hypre-ex5.cpp b/src/Examples/Hypre/tnl-hypre-ex5.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4a55476ac1c13be243dca546879634b268daa662
--- /dev/null
+++ b/src/Examples/Hypre/tnl-hypre-ex5.cpp
@@ -0,0 +1,405 @@
+/**
+ * Description:
+ *
+ * This example solves the 2-D Laplacian problem with zero boundary conditions
+ * on an n x n grid.  The number of unknowns is N=n^2.  The standard 5-point
+ * stencil is used, and we solve for the interior nodes only.
+ *
+ * The example is based on the "ex5.c" example.  It demonstrates how the C code
+ * can be ported to C++ and TNL.  We recommend comparing this example the
+ * original one.
+ *
+ * Compile with: use the install script from TNL
+ *
+ * Sample run:   OMP_NUM_THREADS=1 mpirun -np 4 tnl-hypre-ex5
+ */
+
+#include <cstdio>
+#include <string>
+
+#include <TNL/Math.h>
+#include <TNL/MPI.h>
+#include <TNL/Hypre.h>
+#include <TNL/Containers/HypreParVector.h>
+#include <TNL/Matrices/HypreParCSRMatrix.h>
+#include <TNL/Solvers/Linear/Hypre.h>
+
+#include "vis.c"
+
+int
+hypre_FlexGMRESModifyPCAMGExample( void* precond_data, int iterations, double rel_residual_norm );
+
+int
+main( int argc, char* argv[] )
+{
+   // Initialize MPI
+   TNL::MPI::ScopedInitializer mpi( argc, argv );
+   const int myid = TNL::MPI::GetRank( MPI_COMM_WORLD );
+   const int num_procs = TNL::MPI::GetSize( MPI_COMM_WORLD );
+
+   // Initialize HYPRE and set some global options, notably HYPRE_SetSpGemmUseCusparse(0);
+   TNL::Hypre hypre;
+
+   // Default problem parameters
+   int n = 33;
+   int solver_id = 0;
+   int vis = 0;
+
+   // Parse the command line
+   {
+      int arg_index = 0;
+      int print_usage = 0;
+
+      while( arg_index < argc ) {
+         if( std::string( argv[ arg_index ] ) == "-n" ) {
+            arg_index++;
+            n = std::stoi( argv[ arg_index++ ] );
+         }
+         else if( std::string( argv[ arg_index ] ) == "-solver" ) {
+            arg_index++;
+            solver_id = std::stoi( argv[ arg_index++ ] );
+         }
+         else if( std::string( argv[ arg_index ] ) == "-vis" ) {
+            arg_index++;
+            vis = 1;
+         }
+         else if( std::string( argv[ arg_index ] ) == "-help" ) {
+            print_usage = 1;
+            break;
+         }
+         else {
+            arg_index++;
+         }
+      }
+
+      if( print_usage && myid == 0 ) {
+         std::cerr << "\n"
+                      "Usage: " << argv[ 0 ] << " [<options>]\n"
+                      "\n"
+                      "  -n <n>              : problem size in each direction (default: 33)\n"
+                      "  -solver <ID>        : solver ID\n"
+                      "                        0  - AMG (default) \n"
+                      "                        1  - AMG-PCG\n"
+                      "                        8  - ParaSails-PCG\n"
+                      "                        50 - PCG\n"
+                      "                        61 - AMG-FlexGMRES\n"
+                      "  -vis                : save the solution for GLVis visualization\n"
+                   << std::endl;
+      }
+
+      if( print_usage )
+         return EXIT_SUCCESS;
+   }
+
+   // Set the problem sizes
+   // Preliminaries: we want at least one rank per row
+   if( n * n < num_procs )
+      n = std::sqrt( num_procs ) + 1;
+   const int N = n * n;               // global number of rows
+   const double h = 1.0 / ( n + 1 );  // mesh cell size
+
+   /* Each rank knows only of its own rows - the range is denoted by ilower
+      and upper.  Here we partition the rows. We account for the fact that
+      N may not divide evenly by the number of ranks. */
+   HYPRE_Int local_size = N / num_procs;
+   HYPRE_Int extra = N - local_size * num_procs;
+
+   HYPRE_Int ilower = local_size * myid;
+   ilower += TNL::min( myid, extra );
+
+   HYPRE_Int iupper = local_size * ( myid + 1 );
+   iupper += TNL::min( myid + 1, extra );
+   iupper = iupper - 1;
+
+   // How many rows do I have?
+   local_size = iupper - ilower + 1;
+
+   // Let each rank create its local matrix in the CSR format in TNL
+   using CSR = TNL::Matrices::SparseMatrix< double, TNL::HYPRE_Device, HYPRE_Int >;
+   CSR A_local;
+   A_local.setDimensions( local_size, N );
+
+   // Allocate row capacities - this must match exactly the sparsity pattern of
+   // the matrix
+   typename CSR::RowsCapacitiesType capacities;
+   capacities.setSize( local_size );
+   auto capacities_view = capacities.getView();
+   TNL::Algorithms::ParallelFor< TNL::HYPRE_Device >::exec( ilower, iupper + 1,
+      [=] __cuda_callable__ ( HYPRE_Int i ) mutable
+      {
+         int nnz = 0;
+
+         // The left identity block: position i-n
+         if( i - n >= 0 )
+            nnz++;
+
+         // The left -1: position i-1
+         if( i % n )
+            nnz++;
+
+         // The diagonal: position i
+         nnz++;
+
+         // The right -1: position i+1
+         if( ( i + 1 ) % n )
+            nnz++;
+
+         // The right identity block: position i+n
+         if( i + n < N )
+            nnz++;
+
+         // The row index must be converted from global to local
+         capacities_view[ i - ilower ] = nnz;
+      } );
+   A_local.setRowCapacities( capacities );
+
+   /* Now assemble the local matrix. Each row has at most 5 entries. For
+    * example, if n=3:
+    *   A = [M -I 0; -I M -I; 0 -I M]
+    *   M = [4 -1 0; -1 4 -1; 0 -1 4]
+    */
+   A_local.forAllRows( [=] __cuda_callable__ ( typename CSR::RowView& row ) mutable {
+         // The row index must be converted from local to global
+         const HYPRE_Int i = ilower + row.getRowIndex();
+         int nnz = 0;
+
+         // The left identity block: position i-n
+         if( i - n >= 0 )
+            row.setElement( nnz++, i - n, -1.0 );
+
+         // The left -1: position i-1
+         if( i % n )
+            row.setElement( nnz++, i - 1, -1.0 );
+
+         // The diagonal: position i
+         row.setElement( nnz++, i, 4.0 );
+
+         // The right -1: position i+1
+         if( ( i + 1 ) % n )
+            row.setElement( nnz++, i + 1, -1.0 );
+
+         // The right identity block: position i+n
+         if( i + n < N )
+            row.setElement( nnz++, i + n, -1.0 );
+      } );
+
+   // Bind the TNL matrix to HypreCSR
+   TNL::Matrices::HypreCSRMatrix A_local_hypre;
+   A_local_hypre.bind( A_local );
+
+   // Assemble the distributed matrix in Hypre from the local blocks
+   // Note that this is a square matrix, so we indicate the row partition
+   // size twice (since number of rows = number of cols)
+   using HypreParCSR = TNL::Matrices::HypreParCSRMatrix;
+   HypreParCSR parcsr_A = HypreParCSR::fromLocalBlocks( MPI_COMM_WORLD, N, N, {ilower, iupper + 1}, {ilower, iupper + 1}, A_local_hypre );
+
+   // Deallocate the local matrix since it is not needed anymore in this example
+   A_local_hypre.reset();
+   A_local.reset();
+
+
+   // Create the rhs and solution vectors
+   TNL::Containers::HypreParVector par_b;
+   TNL::Containers::HypreParVector par_x;
+
+   par_b.setDistribution( {ilower, iupper + 1}, 0, N, MPI_COMM_WORLD );
+   par_x.setDistribution( {ilower, iupper + 1}, 0, N, MPI_COMM_WORLD );
+
+   // Set the rhs values to h^2 and the solution to zero
+   par_b.setValue( h * h );
+   par_x.setValue( 0.0 );
+
+
+   // Choose a solver and solve the system
+
+   // AMG
+   if( solver_id == 0 ) {
+      // Create the solver
+      TNL::Solvers::Linear::HypreBoomerAMG solver;
+
+      // Set the matrix of the linear system
+      // WARNING: setMatrix resets the preconditioner, including setting
+      //          default options.
+      // NOTE: The wrapper class sets its own default options that are
+      //       different from Hypre. The overriding settings below result in
+      //       the same state as the hypre-ex5.c example.
+      solver.setMatrix( parcsr_A );
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_BoomerAMGSetPrintLevel( solver, 3 );    // Print solve info + parameters
+      HYPRE_BoomerAMGSetOldDefault( solver );       // Falgout coarsening with modified classical interpolation
+      HYPRE_BoomerAMGSetRelaxType( solver, 6 );     // Sym. G-S/Jacobi hybrid relaxation
+      HYPRE_BoomerAMGSetRelaxOrder( solver, 1 );    // Uses C/F relaxation
+      HYPRE_BoomerAMGSetNumSweeps( solver, 1 );     // Sweeeps on each level
+      HYPRE_BoomerAMGSetTol( solver, 1e-7 );        // Convergence tolerance
+      HYPRE_BoomerAMGSetMaxIter( solver, 20 );      // Use as solver: max iterations
+      HYPRE_BoomerAMGSetAggNumLevels( solver, 0 );  // number of aggressive coarsening levels
+
+      // Solve the linear system (calls AMG setup, solve, and prints final residual norm)
+      solver.solve( par_b, par_x );
+   }
+   // PCG
+   else if( solver_id == 50 ) {
+      // Create the solver
+      TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD );
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_PCGSetMaxIter( solver, 1000 );  // max iterations
+      HYPRE_PCGSetTol( solver, 1e-7 );      // convergence tolerance
+      HYPRE_PCGSetTwoNorm( solver, 1 );     // use the two norm as the stopping criteria
+      HYPRE_PCGSetPrintLevel( solver, 2 );  // prints out the iteration info
+
+      // Set the matrix of the linear system
+      solver.setMatrix( parcsr_A );
+
+      // Ignore errors returned from Hypre functions (e.g. when PCG does not
+      // converge within the maximum iterations limit).
+      solver.setErrorMode( solver.WARN_HYPRE_ERRORS );
+
+      // Solve the linear system (calls PCG setup, solve, and prints final residual norm)
+      solver.solve( par_b, par_x );
+   }
+   // PCG with AMG preconditioner
+   else if( solver_id == 1 ) {
+      // Create the solver
+      TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD );
+
+      // Create the preconditioner
+      TNL::Solvers::Linear::HypreBoomerAMG precond;
+
+      // Set the PCG preconditioner
+      solver.setPreconditioner( precond );
+
+      // Set the matrix of the linear system
+      // WARNING: setMatrix resets the preconditioner, including setting
+      //          default options.
+      solver.setMatrix( parcsr_A );
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_BoomerAMGSetPrintLevel( precond, 1 );    // Print setup info + parameters
+      HYPRE_BoomerAMGSetOldDefault( precond );       // Falgout coarsening with modified classical interpolation
+      HYPRE_BoomerAMGSetRelaxType( precond, 6 );     // Sym G.S./Jacobi hybrid relaxation
+      HYPRE_BoomerAMGSetRelaxOrder( precond, 1 );    // Uses C/F relaxation
+      HYPRE_BoomerAMGSetAggNumLevels( precond, 0 );  // number of aggressive coarsening levels
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_PCGSetMaxIter( solver, 1000 );  // max iterations
+      HYPRE_PCGSetTol( solver, 1e-7 );      // convergence tolerance
+      HYPRE_PCGSetTwoNorm( solver, 1 );     // use the two norm as the stopping criteria
+      HYPRE_PCGSetPrintLevel( solver, 2 );  // prints out the iteration info
+
+      // Solve the linear system (calls PCG setup, solve, and prints final residual norm)
+      solver.solve( par_b, par_x );
+   }
+   // PCG with ParaSails preconditioner
+   else if( solver_id == 8 ) {
+      // Create the solver
+      TNL::Solvers::Linear::HyprePCG solver( MPI_COMM_WORLD );
+
+      // Create the preconditioner
+      TNL::Solvers::Linear::HypreParaSails precond( MPI_COMM_WORLD );
+
+      // Set the PCG preconditioner
+      solver.setPreconditioner( precond );
+
+      // Set the matrix of the linear system
+      // WARNING: setMatrix resets the preconditioner, including setting
+      //          default options.
+      solver.setMatrix( parcsr_A );
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_PCGSetMaxIter( solver, 1000 );  // max iterations
+      HYPRE_PCGSetTol( solver, 1e-7 );      // convergence tolerance
+      HYPRE_PCGSetTwoNorm( solver, 1 );     // use the two norm as the stopping criteria
+      HYPRE_PCGSetPrintLevel( solver, 2 );  // prints out the iteration info
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_ParaSailsSetParams( precond, 0.1, 1 );  // threshold and max levels
+      HYPRE_ParaSailsSetFilter( precond, 0.05 );
+      HYPRE_ParaSailsSetSym( precond, 1 );
+
+      // Solve the linear system (calls PCG setup, solve, and prints final residual norm)
+      solver.solve( par_b, par_x );
+   }
+   // Flexible GMRES with AMG preconditioner
+   else if( solver_id == 61 ) {
+      // Create the solver
+      TNL::Solvers::Linear::HypreFlexGMRES solver( MPI_COMM_WORLD );
+
+      // Create the preconditioner
+      TNL::Solvers::Linear::HypreBoomerAMG precond;
+
+      // Set the FlexGMRES preconditioner
+      solver.setPreconditioner( precond );
+
+      // Set the matrix of the linear system
+      // WARNING: setMatrix resets the preconditioner, including setting
+      //          default options.
+      solver.setMatrix( parcsr_A );
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_FlexGMRESSetKDim( solver, 30 );       // restart parameter
+      HYPRE_FlexGMRESSetMaxIter( solver, 1000 );  // max iterations
+      HYPRE_FlexGMRESSetTol( solver, 1e-7 );      // convergence tolerance
+      HYPRE_FlexGMRESSetPrintLevel( solver, 2 );  // print solve info
+
+      // Set some parameters (See Reference Manual for more parameters)
+      HYPRE_BoomerAMGSetPrintLevel( precond, 1 );    // Print setup info + parameters
+      HYPRE_BoomerAMGSetOldDefault( precond );       // Falgout coarsening with modified classical interpolation
+      HYPRE_BoomerAMGSetRelaxType( precond, 6 );     // Sym G.S./Jacobi hybrid relaxation
+      HYPRE_BoomerAMGSetRelaxOrder( precond, 1 );    // Uses C/F relaxation
+      HYPRE_BoomerAMGSetAggNumLevels( precond, 0 );  // number of aggressive coarsening levels
+
+      // This is an optional call - if you don't call it,
+      // hypre_FlexGMRESModifyPCDefault is used - which does nothing.
+      // Otherwise, you can define your own, similar to the one used here
+      HYPRE_FlexGMRESSetModifyPC( solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample );
+
+      // Solve the linear system (calls FlexGMRES setup, solve, and prints final residual norm)
+      solver.solve( par_b, par_x );
+   }
+   else if( myid == 0 ) {
+      std::cerr << "Invalid solver id specified." << std::endl;
+      return EXIT_FAILURE;
+   }
+
+   // Save the solution for GLVis visualization, see glvis-ex5.sh
+   if( vis ) {
+      char filename[ 255 ];
+      sprintf( filename, "%s.%06d", "vis_tnl/ex5.sol", myid );
+      FILE* file = fopen( filename, "w" );
+      if( file == nullptr ) {
+         printf( "Error: can't open output file %s\n", filename );
+         return EXIT_FAILURE;
+      }
+
+      // Save the solution
+      const auto local_x = par_x.getConstLocalView();
+      for( HYPRE_Int i = 0; i < local_size; i++ )
+         fprintf( file, "%.14e\n", local_x[ i ] );
+
+      fflush( file );
+      fclose( file );
+
+      // Save the global finite element mesh
+      if( myid == 0 )
+         GLVis_PrintGlobalSquareMesh( "vis_tnl/ex5.mesh", n - 1 );
+   }
+
+   return EXIT_SUCCESS;
+}
+
+/**
+ * This is an example (not recommended) of how we can modify things about AMG
+ * that affect the solve phase based on how FlexGMRES is doing... For another
+ * preconditioner it may make sense to modify the tolerance.
+ */
+int
+hypre_FlexGMRESModifyPCAMGExample( void* precond_data, int iterations, double rel_residual_norm )
+{
+   if( rel_residual_norm > .1 )
+      HYPRE_BoomerAMGSetNumSweeps( (HYPRE_Solver) precond_data, 10 );
+   else
+      HYPRE_BoomerAMGSetNumSweeps( (HYPRE_Solver) precond_data, 1 );
+   return 0;
+}
diff --git a/src/Examples/Hypre/verify-ex5.sh b/src/Examples/Hypre/verify-ex5.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6ca17d194d01a4859547298e58ba622c9256db6d
--- /dev/null
+++ b/src/Examples/Hypre/verify-ex5.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -e
+
+# make output directories
+mkdir -p vis vis_tnl
+
+# dbg suffix for binaries
+#dbg=""
+dbg="-dbg"
+
+# problem size: N=n^2
+n=100
+
+# disable OpenMP for reproducibility
+export OMP_NUM_THREADS=1
+
+make_glvis() {
+    local dir="$1"
+    local sol="$dir/ex5.sol"
+
+    echo "FiniteElementSpace" > "$sol"
+    echo "FiniteElementCollection: H1_2D_P1" >> "$sol"
+    echo "VDim: 1" >> "$sol"
+    echo "Ordering: 0" >> "$sol"
+    echo "" >> "$sol"
+    find "$dir" -name "ex5.sol.??????" | sort | xargs cat >> "$sol"
+}
+
+verify() {
+    make_glvis vis
+    make_glvis vis_tnl
+    diff "vis/ex5.sol" "vis_tnl/ex5.sol"
+    rm -f vis/ex5.sol* vis_tnl/ex5.sol*
+}
+
+# BoomerAMG
+mpirun hypre-ex5$dbg -vis -n $n -solver 0
+mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 0
+verify
+
+# PCG
+mpirun hypre-ex5$dbg -vis -n $n -solver 50
+mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 50
+verify
+
+# PCG with AMG
+mpirun hypre-ex5$dbg -vis -n $n -solver 1
+mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 1
+verify
+
+# PCG with ParaSails
+mpirun hypre-ex5$dbg -vis -n $n -solver 8
+mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 8
+verify
+
+# FlexGMRES with AMG
+mpirun hypre-ex5$dbg -vis -n $n -solver 61
+mpirun tnl-hypre-ex5$dbg -vis -n $n -solver 61
+verify
diff --git a/src/Examples/Hypre/vis.c b/src/Examples/Hypre/vis.c
new file mode 100644
index 0000000000000000000000000000000000000000..b051fb90e8e6f4bf1ddd30461d1a8482f2fccdb2
--- /dev/null
+++ b/src/Examples/Hypre/vis.c
@@ -0,0 +1,60 @@
+/******************************************************************************
+ * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
+ * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ ******************************************************************************/
+
+/* Save a structured n x n mesh of square elements on the unit square into a
+   GLVis mesh file with the given name. */
+#include <math.h>
+void GLVis_PrintGlobalSquareMesh(const char *meshfile, int n)
+{
+   FILE *file;
+
+   int Dim = 2;
+   int NumOfVertices = (n + 1) * (n + 1);
+   int NumOfElements = n * n;
+
+   int i, j;
+   double x, y;
+   double h = 1.0 / n;
+
+   if ((file = fopen(meshfile, "w")) == NULL)
+   {
+      printf("Error: can't open output file %s\n", meshfile);
+      exit(1);
+   }
+
+   /* mesh header */
+   fprintf(file, "MFEM mesh v1.0\n");
+   fprintf(file, "\ndimension\n");
+   fprintf(file, "%d\n", Dim);
+
+   /* mesh elements */
+   fprintf(file, "\nelements\n");
+   fprintf(file, "%d\n", NumOfElements);
+   for (j = 0; j < n; j++)
+      for (i = 0; i < n; i++)
+         fprintf(file, "1 3 %d %d %d %d\n", i + j * (n + 1), i + 1 + j * (n + 1),
+                 i + 1 + (j + 1) * (n + 1), i + (j + 1) * (n + 1));
+
+   /* boundary will be generated by GLVis */
+   fprintf(file, "\nboundary\n");
+   fprintf(file, "0\n");
+
+   /* mesh vertices */
+   fprintf(file, "\nvertices\n");
+   fprintf(file, "%d\n", NumOfVertices);
+   fprintf(file, "%d\n", Dim);
+   for (j = 0; j < n + 1; j++)
+      for (i = 0; i < n + 1; i++)
+      {
+         x = i * h;
+         y = j * h;
+         fprintf(file, "%.14e %.14e\n", x, y);
+      }
+
+   fflush(file);
+   fclose(file);
+}
diff --git a/src/TNL/Algorithms/Segments/CSRView.h b/src/TNL/Algorithms/Segments/CSRView.h
index e4d98a1641de3ac3f01dfad374c9779f7c15d427..6216a16113748f1d053874a9bd65d24223e796c3 100644
--- a/src/TNL/Algorithms/Segments/CSRView.h
+++ b/src/TNL/Algorithms/Segments/CSRView.h
@@ -29,7 +29,7 @@ public:
    using IndexType = std::remove_const_t< Index >;
    using KernelType = Kernel;
    using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >;
-   using ConstOffsetsView = typename Containers::Vector< Index, DeviceType, IndexType >::ConstViewType;
+   using ConstOffsetsView = typename OffsetsView::ConstViewType;
    using KernelView = typename Kernel::ViewType;
    using ViewType = CSRView;
    template< typename Device_, typename Index_ >
@@ -167,6 +167,18 @@ public:
    SegmentsPrinter< CSRView, Fetch >
    print( Fetch&& fetch ) const;
 
+   OffsetsView
+   getOffsets()
+   {
+      return offsets;
+   }
+
+   ConstOffsetsView
+   getOffsets() const
+   {
+      return offsets.getConstView();
+   }
+
    KernelType&
    getKernel()
    {
diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp
index 473c778cb2cb3b2247df9510663163e6e889ff50..b5158a5d77c1731749332be1a8423da11a40b8cd 100644
--- a/src/TNL/Containers/Array.hpp
+++ b/src/TNL/Containers/Array.hpp
@@ -134,7 +134,7 @@ template< typename Value, typename Device, typename Index, typename Allocator >
 std::string
 Array< Value, Device, Index, Allocator >::getSerializationType()
 {
-   return detail::ArrayIO< Value, Device, Index >::getSerializationType();
+   return detail::ArrayIO< Value, Index, Allocator >::getSerializationType();
 }
 
 template< typename Value, typename Device, typename Index, typename Allocator >
@@ -579,7 +579,7 @@ operator<<( File& file, const Array< Value, Device, Index, Allocator >& array )
 {
    using IO = detail::ArrayIO< Value, Index, Allocator >;
    saveObjectType( file, IO::getSerializationType() );
-   const Index size = array.getSize();
+   const std::size_t size = array.getSize();
    file.save( &size );
    IO::save( file, array.getData(), array.getSize() );
    return file;
@@ -603,11 +603,9 @@ operator>>( File& file, Array< Value, Device, Index, Allocator >& array )
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError(
          file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
-   Index _size;
-   file.load( &_size );
-   if( _size < 0 )
-      throw Exceptions::FileDeserializationError( file.getFileName(), "invalid array size: " + std::to_string( _size ) );
-   array.setSize( _size );
+   std::size_t size;
+   file.load( &size );
+   array.setSize( size );
    IO::load( file, array.getData(), array.getSize() );
    return file;
 }
diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp
index a166b4ca250b9f9973a44b74db1617300bc20cfb..9164e1fdedd45367f16a4ffd6c54837d485c13d4 100644
--- a/src/TNL/Containers/ArrayView.hpp
+++ b/src/TNL/Containers/ArrayView.hpp
@@ -355,7 +355,7 @@ operator<<( File& file, const ArrayView< Value, Device, Index > view )
 {
    using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >;
    saveObjectType( file, IO::getSerializationType() );
-   const Index size = view.getSize();
+   const std::size_t size = view.getSize();
    file.save( &size );
    IO::save( file, view.getData(), view.getSize() );
    return file;
@@ -379,11 +379,11 @@ operator>>( File& file, ArrayView< Value, Device, Index > view )
    if( type != IO::getSerializationType() )
       throw Exceptions::FileDeserializationError(
          file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
-   Index _size;
-   file.load( &_size );
-   if( _size != view.getSize() )
+   std::size_t size;
+   file.load( &size );
+   if( size != static_cast< std::size_t >( view.getSize() ) )
       throw Exceptions::FileDeserializationError( file.getFileName(),
-                                                  "invalid array size: " + std::to_string( _size ) + " (expected "
+                                                  "invalid array size: " + std::to_string( size ) + " (expected "
                                                      + std::to_string( view.getSize() ) + ")." );
    IO::load( file, view.getData(), view.getSize() );
    return file;
diff --git a/src/TNL/Containers/DistributedArray.h b/src/TNL/Containers/DistributedArray.h
index 1f40b9c4edb64d7b916082ddd6cc8d92ac33adb9..c84ec014c266ad4a8aa6369fa6fb7a9ce1dbfb56 100644
--- a/src/TNL/Containers/DistributedArray.h
+++ b/src/TNL/Containers/DistributedArray.h
@@ -277,6 +277,12 @@ public:
    void
    forElements( IndexType begin, IndexType end, Function&& f ) const;
 
+   void
+   loadFromGlobalFile( const String& fileName, bool allowCasting = false );
+
+   void
+   loadFromGlobalFile( File& file, bool allowCasting = false );
+
 protected:
    ViewType view;
    LocalArrayType localData;
diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp
index 4dc65636e17fcdb1d2878bd2c5efa6e02b909be0..bf86ea9bad84abc0d00d6f96123cfc62ca695b8c 100644
--- a/src/TNL/Containers/DistributedArray.hpp
+++ b/src/TNL/Containers/DistributedArray.hpp
@@ -133,7 +133,7 @@ void
 DistributedArray< Value, Device, Index, Allocator >::setSynchronizer( std::shared_ptr< SynchronizerType > synchronizer,
                                                                       int valuesPerElement )
 {
-   view.setSynchronizer( synchronizer, valuesPerElement );
+   view.setSynchronizer( std::move( synchronizer ), valuesPerElement );
 }
 
 template< typename Value, typename Device, typename Index, typename Allocator >
@@ -316,5 +316,19 @@ DistributedArray< Value, Device, Index, Allocator >::forElements( IndexType begi
    view.forElements( begin, end, f );
 }
 
+template< typename Value, typename Device, typename Index, typename Allocator >
+void
+DistributedArray< Value, Device, Index, Allocator >::loadFromGlobalFile( const String& fileName, bool allowCasting )
+{
+   view.loadFromGlobalFile( fileName, allowCasting );
+}
+
+template< typename Value, typename Device, typename Index, typename Allocator >
+void
+DistributedArray< Value, Device, Index, Allocator >::loadFromGlobalFile( File& file, bool allowCasting )
+{
+   view.loadFromGlobalFile( file, allowCasting );
+}
+
 }  // namespace Containers
 }  // namespace TNL
diff --git a/src/TNL/Containers/DistributedArrayView.h b/src/TNL/Containers/DistributedArrayView.h
index dfc7e490463204bcef6779ef817ed078f3982129..7476a7e0294897aaf63c69c094d7dcabbf8b0115 100644
--- a/src/TNL/Containers/DistributedArrayView.h
+++ b/src/TNL/Containers/DistributedArrayView.h
@@ -253,6 +253,12 @@ public:
    void
    forElements( IndexType begin, IndexType end, Function&& f ) const;
 
+   void
+   loadFromGlobalFile( const String& fileName, bool allowCasting = false );
+
+   void
+   loadFromGlobalFile( File& file, bool allowCasting = false );
+
 protected:
    LocalRangeType localRange;
    IndexType ghosts = 0;
diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp
index f995f63b0455d76e4f98dde43c1d8b8c1a2d0c74..a583d2958a34a755f79203065419c89bcda6e7b1 100644
--- a/src/TNL/Containers/DistributedArrayView.hpp
+++ b/src/TNL/Containers/DistributedArrayView.hpp
@@ -151,7 +151,7 @@ void
 DistributedArrayView< Value, Device, Index >::setSynchronizer( std::shared_ptr< SynchronizerType > synchronizer,
                                                                int valuesPerElement )
 {
-   this->synchronizer = synchronizer;
+   this->synchronizer = std::move( synchronizer );
    this->valuesPerElement = valuesPerElement;
 }
 
@@ -377,5 +377,35 @@ DistributedArrayView< Value, Device, Index >::forElements( IndexType begin, Inde
    localData.forElements( localBegin, localEnd, local_f );
 }
 
+template< typename Value, typename Device, typename Index >
+void
+DistributedArrayView< Value, Device, Index >::loadFromGlobalFile( const String& fileName, bool allowCasting )
+{
+   File file( fileName, std::ios_base::in );
+   loadFromGlobalFile( file, allowCasting );
+}
+
+template< typename Value, typename Device, typename Index >
+void
+DistributedArrayView< Value, Device, Index >::loadFromGlobalFile( File& file, bool allowCasting )
+{
+   using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >;
+   const std::string type = getObjectType( file );
+   const auto parsedType = parseObjectType( type );
+
+   if( ! allowCasting && type != IO::getSerializationType() )
+      throw Exceptions::FileDeserializationError(
+         file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." );
+
+   std::size_t elementsInFile;
+   file.load( &elementsInFile );
+
+   if( allowCasting )
+      IO::loadSubrange(
+         file, elementsInFile, localRange.getBegin(), localData.getData(), localData.getSize(), parsedType[ 1 ] );
+   else
+      IO::loadSubrange( file, elementsInFile, localRange.getBegin(), localData.getData(), localData.getSize() );
+}
+
 }  // namespace Containers
 }  // namespace TNL
diff --git a/src/TNL/Containers/HypreParVector.h b/src/TNL/Containers/HypreParVector.h
new file mode 100644
index 0000000000000000000000000000000000000000..20258e3dd9ba409d2c84248568893200ac0daaa9
--- /dev/null
+++ b/src/TNL/Containers/HypreParVector.h
@@ -0,0 +1,375 @@
+// Copyright (c) 2004-2022 Tomáš Oberhuber et al.
+//
+// This file is part of TNL - Template Numerical Library (https://tnl-project.org/)
+//
+// SPDX-License-Identifier: MIT
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include <TNL/Containers/HypreVector.h>
+   #include <TNL/Containers/DistributedVector.h>
+
+namespace TNL {
+namespace Containers {
+
+/**
+ * \brief Wrapper for Hypre's parallel vector.
+ *
+ * Links to upstream sources:
+ * - https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/par_vector.h
+ * - https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/par_vector.c
+ * - https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/_hypre_parcsr_mv.h (catch-all interface)
+ *
+ * \ingroup Hypre
+ */
+class HypreParVector
+{
+public:
+   using RealType = HYPRE_Real;
+   using ValueType = RealType;
+   using DeviceType = HYPRE_Device;
+   using IndexType = HYPRE_Int;
+   using LocalRangeType = Subrange< IndexType >;
+
+   using VectorType = Containers::DistributedVector< RealType, DeviceType, IndexType >;
+   using ViewType = typename VectorType::ViewType;
+   using ConstViewType = typename VectorType::ConstViewType;
+
+   using LocalVectorType = Containers::Vector< RealType, DeviceType, IndexType >;
+   using LocalViewType = typename LocalVectorType::ViewType;
+   using ConstLocalViewType = typename LocalVectorType::ConstViewType;
+
+   using SynchronizerType = typename ViewType::SynchronizerType;
+
+   // default constructor, no underlying \e hypre_ParVector is created.
+   HypreParVector() = default;
+
+   // TODO: behavior should depend on "owns_data" (shallow vs deep copy)
+   HypreParVector( const HypreParVector& other ) = delete;
+
+   HypreParVector( HypreParVector&& other ) noexcept
+   : v( other.v ), owns_handle( other.owns_handle ), localData( std::move( other.localData ) ), ghosts( other.ghosts ),
+     synchronizer( std::move( other.synchronizer ) ), valuesPerElement( other.valuesPerElement )
+   {
+      other.v = nullptr;
+   }
+
+   // TODO should do a deep copy
+   HypreParVector&
+   operator=( const HypreParVector& other ) = delete;
+
+   HypreParVector&
+   operator=( HypreParVector&& other ) noexcept
+   {
+      v = other.v;
+      other.v = nullptr;
+      owns_handle = other.owns_handle;
+      localData = std::move( other.localData );
+      ghosts = other.ghosts;
+      synchronizer = std::move( other.synchronizer );
+      valuesPerElement = other.valuesPerElement;
+      return *this;
+   }
+
+   //! \brief Initialization by raw data
+   HypreParVector( const LocalRangeType& localRange,
+                   IndexType ghosts,
+                   IndexType globalSize,
+                   MPI_Comm communicator,
+                   LocalViewType localData )
+   {
+      bind( localRange, ghosts, globalSize, communicator, localData );
+   }
+
+   /**
+    * \brief Convert Hypre's format to HypreParVector
+    *
+    * \param take_ownership indicates if the vector should take ownership of
+    * the handle, i.e. whether to call \e hypre_VectorDestroy when it does
+    * not need it anymore.
+    */
+   explicit HypreParVector( hypre_ParVector* handle, bool take_ownership = true )
+   {
+      bind( handle, take_ownership );
+   }
+
+   //! Typecasting to Hypre's \e hypre_ParVector* (which is equivalent to \e HYPRE_ParVector*)
+   operator hypre_ParVector*() const
+   {
+      return v;
+   }
+
+   ~HypreParVector()
+   {
+      reset();
+   }
+
+   LocalRangeType
+   getLocalRange() const
+   {
+      if( v == nullptr )
+         return {};
+      return { hypre_ParVectorPartitioning( v )[ 0 ], hypre_ParVectorPartitioning( v )[ 1 ] };
+   }
+
+   IndexType
+   getGhosts() const
+   {
+      return ghosts;
+   }
+
+   //! \brief Return the MPI communicator.
+   MPI_Comm
+   getCommunicator() const
+   {
+      if( v == nullptr )
+         return MPI_COMM_NULL;
+      return hypre_ParVectorComm( v );
+   }
+
+   //! \brief Returns the global size of the vector.
+   HYPRE_Int
+   getSize() const
+   {
+      if( v == nullptr )
+         return 0;
+      return hypre_ParVectorGlobalSize( v );
+   }
+
+   ConstLocalViewType
+   getConstLocalView() const
+   {
+      return { localData.getData(), getLocalRange().getSize() };
+   }
+
+   LocalViewType
+   getLocalView()
+   {
+      return { localData.getData(), getLocalRange().getSize() };
+   }
+
+   ConstLocalViewType
+   getConstLocalViewWithGhosts() const
+   {
+      return localData.getConstView();
+   }
+
+   LocalViewType
+   getLocalViewWithGhosts()
+   {
+      return localData.getView();
+   }
+
+   ConstViewType
+   getConstView() const
+   {
+      return { getLocalRange(), ghosts, getSize(), getCommunicator(), getConstLocalViewWithGhosts() };
+   }
+
+   ViewType
+   getView()
+   {
+      return { getLocalRange(), ghosts, getSize(), getCommunicator(), getLocalViewWithGhosts() };
+   }
+
+   /**
+    * \brief Drop previously set data (deallocate if the vector was the owner)
+    * and bind to the given data (i.e., the vector does not become the owner).
+    */
+   void
+   bind( const LocalRangeType& localRange,
+         IndexType ghosts,
+         IndexType globalSize,
+         MPI_Comm communicator,
+         LocalViewType localData )
+   {
+      TNL_ASSERT_EQ( localData.getSize(),
+                     localRange.getSize() + ghosts,
+                     "The local array size does not match the local range of the distributed array." );
+      TNL_ASSERT_GE( ghosts, 0, "The ghosts count must be non-negative." );
+
+      // drop/deallocate the current data
+      reset();
+
+      // create handle for the vector
+      IndexType partitioning[ 2 ];
+      partitioning[ 0 ] = localRange.getBegin();
+      partitioning[ 1 ] = localRange.getEnd();
+      v = hypre_ParVectorCreate( communicator, globalSize, partitioning );
+
+      // set view data
+      this->localData.bind( localData );
+      hypre_ParVectorOwnsData( v ) = 0;
+      hypre_ParVectorLocalVector( v ) = this->localData;
+      hypre_ParVectorActualLocalSize( v ) = this->localData.getSize();
+
+      this->ghosts = ghosts;
+   }
+
+   void
+   bind( ViewType view )
+   {
+      bind( view.getLocalRange(), view.getGhosts(), view.getSize(), view.getCommunicator(), view.getLocalViewWithGhosts() );
+   }
+
+   void
+   bind( VectorType& vector )
+   {
+      bind( vector.getView() );
+   }
+
+   void
+   bind( HypreParVector& vector )
+   {
+      bind( vector.getLocalRange(),
+            vector.getGhosts(),
+            vector.getSize(),
+            vector.getCommunicator(),
+            vector.getLocalViewWithGhosts() );
+   }
+
+   /**
+    * \brief Convert Hypre's format to HypreParVector
+    *
+    * \param take_ownership indicates if the vector should take ownership of
+    * the handle, i.e. whether to call \e hypre_VectorDestroy when it does
+    * not need it anymore.
+    */
+   void
+   bind( hypre_ParVector* handle, bool take_ownership = true )
+   {
+      // drop/deallocate the current data
+      if( v != nullptr )
+         hypre_ParVectorDestroy( v );
+
+      // set the handle and ownership flag
+      v = handle;
+      owns_handle = take_ownership;
+
+      // set view data
+      localData.bind( hypre_ParVectorLocalVector( v ) );
+      hypre_ParVectorOwnsData( v ) = 0;
+      hypre_ParVectorLocalVector( v ) = this->localData;
+      hypre_ParVectorActualLocalSize( v ) = this->localData.getSize();
+
+      const HYPRE_Int actual_size = hypre_ParVectorActualLocalSize( v );
+      const HYPRE_Int local_size = hypre_ParVectorPartitioning( v )[ 1 ] - hypre_ParVectorPartitioning( v )[ 0 ];
+      this->ghosts = actual_size - local_size;
+   }
+
+   //! \brief Reset the vector to empty state.
+   void
+   reset()
+   {
+      // drop/deallocate the current data
+      if( owns_handle && v != nullptr ) {
+         hypre_ParVectorDestroy( v );
+         v = nullptr;
+      }
+      else
+         v = nullptr;
+      owns_handle = true;
+
+      localData.reset();
+      ghosts = 0;
+      synchronizer = nullptr;
+      valuesPerElement = 1;
+   }
+
+   void
+   setDistribution( LocalRangeType localRange, IndexType ghosts, IndexType globalSize, const MPI::Comm& communicator )
+   {
+      TNL_ASSERT_LE( localRange.getEnd(), globalSize, "end of the local range is outside of the global range" );
+
+      // drop/deallocate the current data
+      reset();
+
+      // create handle for the vector
+      IndexType partitioning[ 2 ];
+      partitioning[ 0 ] = localRange.getBegin();
+      partitioning[ 1 ] = localRange.getEnd();
+      v = hypre_ParVectorCreate( communicator, globalSize, partitioning );
+
+      // set view data
+      this->localData.setSize( localRange.getSize() + ghosts );
+      hypre_ParVectorOwnsData( v ) = 0;
+      hypre_ParVectorLocalVector( v ) = this->localData;
+      hypre_ParVectorActualLocalSize( v ) = this->localData.getSize();
+
+      this->ghosts = ghosts;
+   }
+
+   //! \brief Set all elements of the vector to \e value.
+   void
+   setValue( RealType value )
+   {
+      if( v != nullptr )
+         hypre_ParVectorSetConstantValues( v, value );
+   }
+
+   // synchronizer stuff
+   void
+   setSynchronizer( std::shared_ptr< SynchronizerType > synchronizer, int valuesPerElement = 1 )
+   {
+      this->synchronizer = std::move( synchronizer );
+      this->valuesPerElement = valuesPerElement;
+   }
+
+   std::shared_ptr< SynchronizerType >
+   getSynchronizer() const
+   {
+      return synchronizer;
+   }
+
+   int
+   getValuesPerElement() const
+   {
+      return valuesPerElement;
+   }
+
+   // Note that this method is not thread-safe - only the thread which created
+   // and "owns" the instance of this object can call this method.
+   void
+   startSynchronization()
+   {
+      if( ghosts == 0 )
+         return;
+      // TODO: assert does not play very nice with automatic synchronizations from operations like
+      //       assignment of scalars
+      // (Maybe we should just drop all automatic syncs? But that's not nice for high-level codes
+      // like linear solvers...)
+      TNL_ASSERT_TRUE( synchronizer, "the synchronizer was not set" );
+
+      typename SynchronizerType::ByteArrayView bytes;
+      bytes.bind( reinterpret_cast< std::uint8_t* >( localData.getData() ), sizeof( ValueType ) * localData.getSize() );
+      synchronizer->synchronizeByteArrayAsync( bytes, sizeof( ValueType ) * valuesPerElement );
+   }
+
+   void
+   waitForSynchronization() const
+   {
+      if( synchronizer && synchronizer->async_op.valid() ) {
+         synchronizer->async_wait_timer.start();
+         synchronizer->async_op.wait();
+         synchronizer->async_wait_timer.stop();
+      }
+   }
+
+protected:
+   hypre_ParVector* v = nullptr;
+   bool owns_handle = true;
+   HypreVector localData;
+   HYPRE_Int ghosts = 0;
+
+   std::shared_ptr< SynchronizerType > synchronizer = nullptr;
+   int valuesPerElement = 1;
+};
+
+}  // namespace Containers
+}  // namespace TNL
+
+#endif  // HAVE_HYPRE
diff --git a/src/TNL/Containers/HypreVector.h b/src/TNL/Containers/HypreVector.h
new file mode 100644
index 0000000000000000000000000000000000000000..5fc6a6ebdc92156e0650b959eeae702ab53de590
--- /dev/null
+++ b/src/TNL/Containers/HypreVector.h
@@ -0,0 +1,265 @@
+// Copyright (c) 2004-2022 Tomáš Oberhuber et al.
+//
+// This file is part of TNL - Template Numerical Library (https://tnl-project.org/)
+//
+// SPDX-License-Identifier: MIT
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include <TNL/Hypre.h>
+
+   #include <TNL/Containers/Vector.h>
+
+namespace TNL {
+namespace Containers {
+
+/**
+ * \brief Wrapper for Hypre's sequential vector.
+ *
+ * Links to upstream sources:
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/vector.h
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/vector.c
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/seq_mv.h (catch-all interface)
+ *
+ * \ingroup Hypre
+ */
+class HypreVector
+{
+public:
+   using RealType = HYPRE_Real;
+   using ValueType = RealType;
+   using DeviceType = HYPRE_Device;
+   using IndexType = HYPRE_Int;
+
+   using VectorType = Containers::Vector< RealType, DeviceType, IndexType >;
+   using ViewType = typename VectorType::ViewType;
+   using ConstViewType = typename VectorType::ConstViewType;
+
+   HypreVector() = default;
+
+   // TODO: behavior should depend on "owns_data" (shallow vs deep copy)
+   HypreVector( const HypreVector& other ) = delete;
+
+   HypreVector( HypreVector&& other ) noexcept : v( other.v ), owns_handle( other.owns_handle )
+   {
+      other.v = nullptr;
+   }
+
+   // TODO should do a deep copy
+   HypreVector&
+   operator=( const HypreVector& other ) = delete;
+
+   HypreVector&
+   operator=( HypreVector&& other ) noexcept
+   {
+      v = other.v;
+      other.v = nullptr;
+      owns_handle = other.owns_handle;
+      return *this;
+   }
+
+   HypreVector( RealType* data, IndexType size )
+   {
+      bind( data, size );
+   }
+
+   HypreVector( ViewType view )
+   {
+      bind( view );
+   }
+
+   /**
+    * \brief Convert Hypre's format to HypreVector
+    *
+    * \param take_ownership indicates if the vector should take ownership of
+    * the handle, i.e. whether to call \e hypre_VectorDestroy when it does
+    * not need it anymore.
+    */
+   explicit HypreVector( hypre_Vector* handle, bool take_ownership = true )
+   {
+      bind( handle, take_ownership );
+   }
+
+   operator const hypre_Vector*() const noexcept
+   {
+      return v;
+   }
+
+   operator hypre_Vector*() noexcept
+   {
+      return v;
+   }
+
+   // HYPRE_Vector is "equivalent" to pointer to hypre_Vector, but requires
+   // ugly C-style cast on the pointer (which is done even in Hypre itself)
+   // https://github.com/hypre-space/hypre/blob/master/src/seq_mv/HYPRE_vector.c
+   operator HYPRE_Vector() const noexcept
+   {
+      return (HYPRE_Vector) v;
+   }
+
+   ~HypreVector()
+   {
+      reset();
+   }
+
+   const RealType*
+   getData() const noexcept
+   {
+      if( v == nullptr )
+         return nullptr;
+      return hypre_VectorData( v );
+   }
+
+   RealType*
+   getData() noexcept
+   {
+      if( v == nullptr )
+         return nullptr;
+      return hypre_VectorData( v );
+   }
+
+   IndexType
+   getSize() const
+   {
+      if( v == nullptr )
+         return 0;
+      return hypre_VectorSize( v );
+   }
+
+   ConstViewType
+   getConstView() const
+   {
+      if( v == nullptr )
+         return {};
+      return { getData(), getSize() };
+   }
+
+   ViewType
+   getView()
+   {
+      if( v == nullptr )
+         return {};
+      return { getData(), getSize() };
+   }
+
+   /**
+    * \brief Drop previously set data (deallocate if the vector was the owner)
+    * and bind to the given data (i.e., the vector does not become the owner).
+    */
+   void
+   bind( RealType* data, IndexType size )
+   {
+      // drop/deallocate the current data
+      reset();
+
+      // create handle for the vector
+      v = hypre_SeqVectorCreate( 0 );
+
+      // set view data
+      hypre_VectorOwnsData( v ) = 0;
+      hypre_VectorData( v ) = data;
+      hypre_VectorSize( v ) = size;
+      // TODO: v->memory_location
+   }
+
+   void
+   bind( ViewType view )
+   {
+      bind( view.getData(), view.getSize() );
+   }
+
+   void
+   bind( VectorType& vector )
+   {
+      bind( vector.getView() );
+   }
+
+   void
+   bind( HypreVector& vector )
+   {
+      bind( vector.getData(), vector.getSize() );
+   }
+
+   /**
+    * \brief Convert Hypre's format to HypreSeqVector
+    *
+    * \param take_ownership indicates if the vector should take ownership of
+    * the handle, i.e. whether to call \e hypre_VectorDestroy when it does
+    * not need it anymore.
+    */
+   void
+   bind( hypre_Vector* handle, bool take_ownership = true )
+   {
+      // drop/deallocate the current data
+      reset();
+
+      // set the handle and ownership flag
+      v = handle;
+      owns_handle = take_ownership;
+   }
+
+   //! \brief Reset the vector to empty state.
+   void
+   reset()
+   {
+      if( owns_handle && v != nullptr ) {
+         hypre_SeqVectorDestroy( v );
+         v = nullptr;
+      }
+      else
+         v = nullptr;
+      owns_handle = true;
+   }
+
+   /**
+    * \brief Set the new vector size.
+    *
+    * - if the vector previously owned data, they are deallocated
+    * - new size is set
+    * - the vector is initialized with \e hypre_SeqVectorInitialize
+    *   (i.e., data are allocated)
+    */
+   void
+   setSize( IndexType size )
+   {
+      hypre_SeqVectorDestroy( v );
+      v = hypre_SeqVectorCreate( size );
+      hypre_SeqVectorInitialize( v );
+   }
+
+   //! \brief Equivalent to \ref setSize.
+   void
+   resize( IndexType size )
+   {
+      setSize( size );
+   }
+
+   //! \brief Equivalent to \ref setSize followed by \ref setValue.
+   void
+   resize( IndexType size, RealType value )
+   {
+      setSize( size );
+      setValue( value );
+   }
+
+   //! \brief Set all elements of the vector to \e value.
+   void
+   setValue( RealType value )
+   {
+      hypre_SeqVectorSetConstantValues( v, value );
+   }
+
+protected:
+   hypre_Vector* v = nullptr;
+   bool owns_handle = true;
+};
+
+}  // namespace Containers
+}  // namespace TNL
+
+#endif  // HAVE_HYPRE
diff --git a/src/TNL/Containers/Partitioner.h b/src/TNL/Containers/Partitioner.h
index e79a6d770572dc56579354bd7c4789332d20d129..3b121b243f1251763dad1cb403ac65da4a3d7d4f 100644
--- a/src/TNL/Containers/Partitioner.h
+++ b/src/TNL/Containers/Partitioner.h
@@ -29,28 +29,22 @@ public:
    static SubrangeType
    splitRange( Index globalSize, const MPI::Comm& communicator )
    {
-      if( communicator != MPI_COMM_NULL ) {
-         const int rank = communicator.rank();
-         const int partitions = communicator.size();
-         const Index begin = TNL::min( globalSize, rank * globalSize / partitions );
-         const Index end = TNL::min( globalSize, ( rank + 1 ) * globalSize / partitions );
-         return SubrangeType( begin, end );
+      if( communicator == MPI_COMM_NULL )
+         return { 0, 0 };
+
+      const int rank = communicator.rank();
+      const int partitions = communicator.size();
+
+      const Index partSize = globalSize / partitions;
+      const int remainder = globalSize % partitions;
+      if( rank < remainder ) {
+         const Index begin = rank * ( partSize + 1 );
+         const Index end = begin + partSize + 1;
+         return { begin, end };
       }
-      else
-         return SubrangeType( 0, 0 );
-   }
-
-   // Gets the owner of given global index.
-   __cuda_callable__
-   static int
-   getOwner( Index i, Index globalSize, int partitions )
-   {
-      int owner = i * partitions / globalSize;
-      if( owner < partitions - 1 && i >= getOffset( globalSize, owner + 1, partitions ) )
-         owner++;
-      TNL_ASSERT_GE( i, getOffset( globalSize, owner, partitions ), "BUG in getOwner" );
-      TNL_ASSERT_LT( i, getOffset( globalSize, owner + 1, partitions ), "BUG in getOwner" );
-      return owner;
+      const Index begin = remainder * ( partSize + 1 ) + ( rank - remainder ) * partSize;
+      const Index end = begin + partSize;
+      return { begin, end };
    }
 
    // Gets the offset of data for given rank.
@@ -58,7 +52,11 @@ public:
    static Index
    getOffset( Index globalSize, int rank, int partitions )
    {
-      return rank * globalSize / partitions;
+      const Index partSize = globalSize / partitions;
+      const int remainder = globalSize % partitions;
+      if( rank < remainder )
+         return rank * ( partSize + 1 );
+      return remainder * ( partSize + 1 ) + ( rank - remainder ) * partSize;
    }
 
    // Gets the size of data assigned to given rank.
@@ -66,9 +64,11 @@ public:
    static Index
    getSizeForRank( Index globalSize, int rank, int partitions )
    {
-      const Index begin = min( globalSize, rank * globalSize / partitions );
-      const Index end = min( globalSize, ( rank + 1 ) * globalSize / partitions );
-      return end - begin;
+      const Index partSize = globalSize / partitions;
+      const int remainder = globalSize % partitions;
+      if( rank < remainder )
+         return partSize + 1;
+      return partSize;
    }
 
    template< typename Device >
diff --git a/src/TNL/Containers/detail/ArrayIO.h b/src/TNL/Containers/detail/ArrayIO.h
index a97caf6d1b8e0dc62cf6a7a9c09f8d0f090519dc..31f46c47c9d09387e21c612a2e886562b993594f 100644
--- a/src/TNL/Containers/detail/ArrayIO.h
+++ b/src/TNL/Containers/detail/ArrayIO.h
@@ -26,12 +26,11 @@ struct ArrayIO< Value, Index, Allocator, true >
    static std::string
    getSerializationType()
    {
-      return "Containers::Array< " + TNL::getSerializationType< Value >() + ", [any_device], "
-           + TNL::getSerializationType< Index >() + ", [any_allocator] >";
+      return "TNL::Containers::Array< " + TNL::getSerializationType< Value >() + " >";
    }
 
    static void
-   save( File& file, const Value* data, const Index elements )
+   save( File& file, const Value* data, Index elements )
    {
       Index i;
       try {
@@ -46,7 +45,7 @@ struct ArrayIO< Value, Index, Allocator, true >
    }
 
    static void
-   load( File& file, Value* data, const Index elements )
+   load( File& file, Value* data, Index elements )
    {
       Index i = 0;
       try {
@@ -59,6 +58,31 @@ struct ArrayIO< Value, Index, Allocator, true >
                                                         + std::to_string( elements ) + " from the file." );
       }
    }
+
+   static void
+   loadSubrange( File& file, std::size_t elementsInFile, Index offset, Value* data, Index size )
+   {
+      if( std::size_t( offset + size ) > elementsInFile )
+         throw Exceptions::FileDeserializationError(
+            file.getFileName(), "unable to read subrange of array elements: offset + size > elementsInFile" );
+
+      if( size == 0 ) {
+         file.ignore< Value >( elementsInFile );
+         return;
+      }
+
+      try {
+         file.ignore< Value >( offset );
+         load( file, data, size );
+         file.ignore< Value >( elementsInFile - offset - size );
+      }
+      catch( ... ) {
+         throw Exceptions::FileDeserializationError( file.getFileName(),
+                                                     "unable to read array elements in the subrange ["
+                                                        + std::to_string( offset ) + ", " + std::to_string( offset + size )
+                                                        + ") from the file." );
+      }
+   }
 };
 
 template< typename Value, typename Index, typename Allocator >
@@ -67,17 +91,17 @@ struct ArrayIO< Value, Index, Allocator, false >
    static std::string
    getSerializationType()
    {
-      return "Containers::Array< " + TNL::getSerializationType< Value >() + ", [any_device], "
-           + TNL::getSerializationType< Index >() + ", [any_allocator] >";
+      return "TNL::Containers::Array< " + TNL::getSerializationType< Value >() + " >";
    }
 
+   template< typename TargetValue = Value >
    static void
-   save( File& file, const Value* data, const Index elements )
+   save( File& file, const Value* data, Index elements )
    {
       if( elements == 0 )
          return;
       try {
-         file.save< Value, Value, Allocator >( data, elements );
+         file.save< Value, TargetValue, Allocator >( data, elements );
       }
       catch( ... ) {
          throw Exceptions::FileSerializationError(
@@ -86,18 +110,143 @@ struct ArrayIO< Value, Index, Allocator, false >
    }
 
    static void
-   load( File& file, Value* data, const Index elements )
+   save( File& file, const Value* data, Index elements, const std::string& typeInFile )
+   {
+      if( typeInFile == getType< Value >() )
+         save< Value >( file, data, elements );
+      // check fundamental types for type casting
+      else if( typeInFile == getType< std::int8_t >() )
+         save< std::int8_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint8_t >() )
+         save< std::uint8_t >( file, data, elements );
+      else if( typeInFile == getType< std::int16_t >() )
+         save< std::int16_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint16_t >() )
+         save< std::uint16_t >( file, data, elements );
+      else if( typeInFile == getType< std::int32_t >() )
+         save< std::int32_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint32_t >() )
+         save< std::uint32_t >( file, data, elements );
+      else if( typeInFile == getType< std::int64_t >() )
+         save< std::int64_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint64_t >() )
+         save< std::uint64_t >( file, data, elements );
+      else if( typeInFile == getType< float >() )
+         save< float >( file, data, elements );
+      else if( typeInFile == getType< double >() )
+         save< double >( file, data, elements );
+      else
+         throw Exceptions::FileSerializationError(
+            file.getFileName(),
+            "value type " + getType< Value >() + " cannot be type-cast to the requested type " + std::string( typeInFile ) );
+   }
+
+   template< typename SourceValue = Value >
+   static void
+   load( File& file, Value* data, Index elements )
    {
       if( elements == 0 )
          return;
       try {
-         file.load< Value, Value, Allocator >( data, elements );
+         file.load< Value, SourceValue, Allocator >( data, elements );
       }
       catch( ... ) {
          throw Exceptions::FileDeserializationError(
             file.getFileName(), "unable to read " + std::to_string( elements ) + " array elements from the file." );
       }
    }
+
+   static void
+   load( File& file, Value* data, Index elements, const std::string& typeInFile )
+   {
+      if( typeInFile == getType< Value >() )
+         load( file, data, elements );
+      // check fundamental types for type casting
+      else if( typeInFile == getType< std::int8_t >() )
+         load< std::int8_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint8_t >() )
+         load< std::uint8_t >( file, data, elements );
+      else if( typeInFile == getType< std::int16_t >() )
+         load< std::int16_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint16_t >() )
+         load< std::uint16_t >( file, data, elements );
+      else if( typeInFile == getType< std::int32_t >() )
+         load< std::int32_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint32_t >() )
+         load< std::uint32_t >( file, data, elements );
+      else if( typeInFile == getType< std::int64_t >() )
+         load< std::int64_t >( file, data, elements );
+      else if( typeInFile == getType< std::uint64_t >() )
+         load< std::uint64_t >( file, data, elements );
+      else if( typeInFile == getType< float >() )
+         load< float >( file, data, elements );
+      else if( typeInFile == getType< double >() )
+         load< double >( file, data, elements );
+      else
+         throw Exceptions::FileDeserializationError( file.getFileName(),
+                                                     "value type " + std::string( typeInFile )
+                                                        + " cannot be type-cast to the requested type " + getType< Value >() );
+   }
+
+   template< typename SourceValue = Value >
+   static void
+   loadSubrange( File& file, std::size_t elementsInFile, Index offset, Value* data, Index size )
+   {
+      if( std::size_t( offset + size ) > elementsInFile )
+         throw Exceptions::FileDeserializationError(
+            file.getFileName(),
+            "unable to read subrange of array elements: offset + size > elementsInFile: " + std::to_string( offset ) + " + "
+               + std::to_string( size ) + " > " + std::to_string( elementsInFile ) );
+
+      if( size == 0 ) {
+         file.ignore< SourceValue >( elementsInFile );
+         return;
+      }
+
+      try {
+         file.ignore< SourceValue >( offset );
+         load< SourceValue >( file, data, size );
+         file.ignore< SourceValue >( elementsInFile - offset - size );
+      }
+      catch( ... ) {
+         throw Exceptions::FileDeserializationError( file.getFileName(),
+                                                     "unable to read array elements in the subrange ["
+                                                        + std::to_string( offset ) + ", " + std::to_string( offset + size )
+                                                        + ") from the file." );
+      }
+   }
+
+   static void
+   loadSubrange( File& file, std::size_t elementsInFile, Index offset, Value* data, Index size, const std::string& typeInFile )
+   {
+      if( typeInFile == getType< Value >() )
+         loadSubrange( file, elementsInFile, offset, data, size );
+      // check fundamental types for type casting
+      else if( typeInFile == getType< std::int8_t >() )
+         loadSubrange< std::int8_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::uint8_t >() )
+         loadSubrange< std::uint8_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::int16_t >() )
+         loadSubrange< std::int16_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::uint16_t >() )
+         loadSubrange< std::uint16_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::int32_t >() )
+         loadSubrange< std::int32_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::uint32_t >() )
+         loadSubrange< std::uint32_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::int64_t >() )
+         loadSubrange< std::int64_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< std::uint64_t >() )
+         loadSubrange< std::uint64_t >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< float >() )
+         loadSubrange< float >( file, elementsInFile, offset, data, size );
+      else if( typeInFile == getType< double >() )
+         loadSubrange< double >( file, elementsInFile, offset, data, size );
+      else
+         throw Exceptions::FileDeserializationError( file.getFileName(),
+                                                     "value type " + std::string( typeInFile )
+                                                        + " cannot be type-cast to the requested type " + getType< Value >() );
+   }
 };
 
 }  // namespace detail
diff --git a/src/TNL/File.h b/src/TNL/File.h
index 92dfd58e4edaa89b63a368e4989b48f62997e37b..7449a15921f88774bdf350f8e28518cbfe8a7675 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -140,6 +140,18 @@ public:
    void
    save( const Type* buffer, std::streamsize elements = 1 );
 
+   /**
+    * \brief Extracts and discards characters from the file.
+    *
+    * Throws \ref std::ios_base::failure on failure.
+    *
+    * \tparam SourceType type of data stored on the file,
+    * \param elements number of elements to be read and ignored.
+    */
+   template< typename SourceType >
+   void
+   ignore( std::streamsize elements = 1 );
+
 protected:
    // implementation for all allocators which allocate data accessible from host
    template< typename Type,
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index 0d10985f2a8ef7803be919d6f79c7b20bd40c179..28dc560cf9d739e2877e5dbe55aec2a8e52a5e31 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -103,7 +103,6 @@ File::load_impl( Type* buffer, std::streamsize elements )
          file.read( reinterpret_cast< char* >( cast_buffer.get() ), sizeof( SourceType ) * transfer );
          for( std::streamsize i = 0; i < transfer; i++ )
             buffer[ readElements++ ] = static_cast< Type >( cast_buffer[ i ] );
-         readElements += transfer;
       }
    }
 }
@@ -184,7 +183,6 @@ File::save_impl( const Type* buffer, std::streamsize elements )
          for( std::streamsize i = 0; i < transfer; i++ )
             cast_buffer[ i ] = static_cast< TargetType >( buffer[ writtenElements++ ] );
          file.write( reinterpret_cast< char* >( cast_buffer.get() ), sizeof( TargetType ) * transfer );
-         writtenElements += transfer;
       }
    }
 }
@@ -234,6 +232,15 @@ File::save_impl( const Type* buffer, std::streamsize elements )
 #endif
 }
 
+template< typename SourceType >
+void
+File::ignore( std::streamsize elements )
+{
+   // use seekg instead of ignore for efficiency
+   // https://stackoverflow.com/a/31246560
+   file.seekg( sizeof( SourceType ) * elements, std::ios_base::cur );
+}
+
 inline bool
 fileExists( const String& fileName )
 {
diff --git a/src/TNL/Hypre.h b/src/TNL/Hypre.h
new file mode 100644
index 0000000000000000000000000000000000000000..591b82fcc168a2788cd138e4bea9681cff99eb54
--- /dev/null
+++ b/src/TNL/Hypre.h
@@ -0,0 +1,130 @@
+// Copyright (c) 2004-2022 Tomáš Oberhuber et al.
+//
+// This file is part of TNL - Template Numerical Library (https://tnl-project.org/)
+//
+// SPDX-License-Identifier: MIT
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include <mpi.h>
+
+   // Hypre header files
+   #include <seq_mv.h>
+   #include <_hypre_parcsr_mv.h>
+   #include <_hypre_parcsr_ls.h>
+
+   #ifdef HYPRE_MIXEDINT
+      #error "TNL does not work with HYPRE's mixed-int support (i.e. when HYPRE_Int and HYPRE_BigInt are different types)"
+   #endif
+   #ifdef HYPRE_COMPLEX
+      #error "TNL does not work with HYPRE's complex numbers support"
+   #endif
+
+   #if defined( HYPRE_USING_GPU ) && ! ( defined( HYPRE_USING_CUDA ) || defined( HYPRE_USING_HIP ) )
+      #error "Unsupported GPU build of HYPRE! Only CUDA and HIP builds are supported."
+   #endif
+   #if defined( HYPRE_USING_CUDA ) && ! defined( HAVE_CUDA )
+      #error "HAVE_CUDA is required when HYPRE is built with CUDA!"
+   #endif
+   #if defined( HYPRE_USING_HIP ) && ! defined( HAVE_HIP )
+      #error "HAVE_HIP is required when HYPRE is built with HIP!"
+   #endif
+
+namespace TNL {
+
+/**
+ * \defgroup Hypre  Wrappers for the Hypre library
+ *
+ * This group includes various wrapper classes for data structures and
+ * algorithms implemented in the [Hypre library][hypre]. See the
+ * [example][example] for how these wrappers can be used.
+ *
+ * [hypre]: https://github.com/hypre-space/hypre
+ * [example]: https://mmg-gitlab.fjfi.cvut.cz/gitlab/tnl/tnl-dev/-/blob/develop/src/Examples/Hypre/tnl-hypre-ex5.cpp
+ *
+ * @{
+ */
+
+/**
+ * \brief A simple RAII wrapper for Hypre's initialization and finalization.
+ *
+ * When the object is constructed, it calls \e HYPRE_Init() and sets some
+ * GPU-relevant options. The \e HYPRE_Finalize() function is called
+ * automatically from the object's destructor.
+ */
+struct Hypre
+{
+   //! \brief Constructor initializes Hypre by calling \e HYPRE_Init() and set default options.
+   Hypre()
+   {
+      HYPRE_Init();
+
+      setDefaultOptions();
+   }
+
+   //! \brief Sets the default Hypre global options (mostly GPU-relevant).
+   void
+   setDefaultOptions()
+   {
+      // Global Hypre options, see
+      // https://hypre.readthedocs.io/en/latest/solvers-boomeramg.html#gpu-supported-options
+
+   #ifdef HYPRE_USING_CUDA
+      // Use hypre's SpGEMM instead of cuSPARSE for performance reasons
+      HYPRE_SetSpGemmUseCusparse( 0 );
+   #elif defined( HYPRE_USING_HIP )
+      // Use rocSPARSE instead of hypre's SpGEMM for performance reasons (default)
+      HYPRE_SetSpGemmUseCusparse( 1 );
+   #endif
+
+      // The following options are Hypre's defaults as of version 2.24
+
+      // Allocate Hypre objects in GPU memory (default)
+      // HYPRE_SetMemoryLocation(HYPRE_MEMORY_DEVICE);
+
+      // Where to execute when using UVM (default)
+      // HYPRE_SetExecutionPolicy(HYPRE_EXEC_DEVICE);
+
+      // Use GPU-based random number generator (default)
+      // HYPRE_SetUseGpuRand(1);
+   }
+
+   //! \brief Destructor that finalizes Hypre when the object goes out of scope.
+   ~Hypre()
+   {
+      HYPRE_Finalize();
+   }
+};
+
+}  // namespace TNL
+
+// clang-format off
+   #ifdef HYPRE_USING_CUDA
+      #include <TNL/Devices/Cuda.h>
+      namespace TNL {
+         using HYPRE_Device = Devices::Cuda;
+      }
+   #else
+      #include <TNL/Devices/Host.h>
+      namespace TNL {
+         /**
+          * \brief The \ref TNL::Devices "device" compatible with Hypre's data
+          * structures.
+          *
+          * The type depends on how the Hypre library was configured. By
+          * default, it is \ref Devices::Host. When using Hypre built with CUDA
+          * support, it is \ref Devices::Cuda.
+          */
+         using HYPRE_Device = Devices::Host;
+      }
+   #endif
+// clang-format on
+
+// this is a Doxygen end-group marker
+//! @}
+
+#endif  // HAVE_HYPRE
diff --git a/src/TNL/Matrices/HypreCSRMatrix.h b/src/TNL/Matrices/HypreCSRMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..0a9590ba453e259b0570819b664437acab850dea
--- /dev/null
+++ b/src/TNL/Matrices/HypreCSRMatrix.h
@@ -0,0 +1,410 @@
+// Copyright (c) 2004-2022 Tomáš Oberhuber et al.
+//
+// This file is part of TNL - Template Numerical Library (https://tnl-project.org/)
+//
+// SPDX-License-Identifier: MIT
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include <TNL/Hypre.h>
+
+   #include <TNL/Matrices/SparseMatrix.h>
+
+namespace TNL {
+namespace Matrices {
+
+/**
+ * \brief Wrapper for Hypre's sequential CSR matrix.
+ *
+ * Links to upstream sources:
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.h
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.c
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/seq_mv.h (catch-all interface)
+ *
+ * \ingroup Hypre
+ */
+class HypreCSRMatrix
+{
+public:
+   using RealType = HYPRE_Real;
+   using ValueType = RealType;
+   using DeviceType = HYPRE_Device;
+   using IndexType = HYPRE_Int;
+
+   using MatrixType = SparseMatrix< RealType, DeviceType, IndexType, GeneralMatrix, Algorithms::Segments::CSRDefault >;
+   using ViewType = typename MatrixType::ViewType;
+   using ConstViewType = typename MatrixType::ConstViewType;
+
+   using ValuesViewType = Containers::VectorView< RealType, DeviceType, IndexType >;
+   using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+   using ColumnIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using ColumnIndexesViewType = typename ColumnIndexesVectorType::ViewType;
+   using ConstColumnIndexesViewType = typename ColumnIndexesVectorType::ConstViewType;
+   using SegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, IndexType >;
+   using ConstSegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, std::add_const_t< IndexType > >;
+
+   HypreCSRMatrix() = default;
+
+   // TODO: behavior should depend on "owns_data" (shallow vs deep copy)
+   HypreCSRMatrix( const HypreCSRMatrix& other ) = delete;
+
+   HypreCSRMatrix( HypreCSRMatrix&& other ) noexcept : m( other.m ), owns_handle( other.owns_handle )
+   {
+      other.m = nullptr;
+   }
+
+   // TODO should do a deep copy
+   HypreCSRMatrix&
+   operator=( const HypreCSRMatrix& other ) = delete;
+
+   HypreCSRMatrix&
+   operator=( HypreCSRMatrix&& other ) noexcept
+   {
+      m = other.m;
+      other.m = nullptr;
+      owns_handle = other.owns_handle;
+      return *this;
+   }
+
+   HypreCSRMatrix( IndexType rows,
+                   IndexType columns,
+                   ValuesViewType values,
+                   ColumnIndexesViewType columnIndexes,
+                   ColumnIndexesViewType rowOffsets )
+   {
+      bind( rows, columns, std::move( values ), std::move( columnIndexes ), std::move( rowOffsets ) );
+   }
+
+   HypreCSRMatrix( ViewType view )
+   {
+      bind( std::move( view ) );
+   }
+
+   /**
+    * \brief Convert Hypre's format to \e HypreCSRMatrix
+    *
+    * \param take_ownership indicates if the matrix should take ownership of
+    * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does
+    * not need it anymore.
+    */
+   explicit HypreCSRMatrix( hypre_CSRMatrix* handle, bool take_ownership = true )
+   {
+      bind( handle, take_ownership );
+   }
+
+   operator const hypre_CSRMatrix*() const noexcept
+   {
+      return m;
+   }
+
+   operator hypre_CSRMatrix*() noexcept
+   {
+      return m;
+   }
+
+   // HYPRE_CSRMatrix is "equivalent" to pointer to hypre_CSRMatrix, but requires
+   // ugly C-style cast on the pointer (which is done even in Hypre itself)
+   // https://github.com/hypre-space/hypre/blob/master/src/seq_mv/HYPRE_csr_matrix.c
+   operator HYPRE_CSRMatrix() noexcept
+   {
+      return (HYPRE_CSRMatrix) m;
+   }
+
+   ~HypreCSRMatrix()
+   {
+      reset();
+   }
+
+   IndexType
+   getRows() const
+   {
+      if( m == nullptr )
+         return 0;
+      return hypre_CSRMatrixNumRows( m );
+   }
+
+   IndexType
+   getColumns() const
+   {
+      if( m == nullptr )
+         return 0;
+      return hypre_CSRMatrixNumCols( m );
+   }
+
+   IndexType
+   getNonzeroElementsCount() const
+   {
+      if( m == nullptr )
+         return 0;
+      return hypre_CSRMatrixNumNonzeros( m );
+   }
+
+   ConstValuesViewType
+   getValues() const
+   {
+      if( m == nullptr )
+         return {};
+      return { hypre_CSRMatrixData( m ), hypre_CSRMatrixNumNonzeros( m ) };
+   }
+
+   ValuesViewType
+   getValues()
+   {
+      if( m == nullptr )
+         return {};
+      return { hypre_CSRMatrixData( m ), hypre_CSRMatrixNumNonzeros( m ) };
+   }
+
+   ConstColumnIndexesViewType
+   getColumnIndexes() const
+   {
+      if( m == nullptr )
+         return {};
+      static_assert( std::is_same< HYPRE_Int, HYPRE_BigInt >::value,
+                     "The J array cannot be accessed via this method when HYPRE_Int and HYPRE_BigInt are different types." );
+      if( hypre_CSRMatrixBigJ( m ) != nullptr )
+         return { hypre_CSRMatrixBigJ( m ), hypre_CSRMatrixNumNonzeros( m ) };
+      return { hypre_CSRMatrixJ( m ), hypre_CSRMatrixNumNonzeros( m ) };
+   }
+
+   ColumnIndexesViewType
+   getColumnIndexes()
+   {
+      if( m == nullptr )
+         return {};
+      static_assert( std::is_same< HYPRE_Int, HYPRE_BigInt >::value,
+                     "The J array cannot be accessed via this method when HYPRE_Int and HYPRE_BigInt are different types." );
+      if( hypre_CSRMatrixBigJ( m ) != nullptr )
+         return { hypre_CSRMatrixBigJ( m ), hypre_CSRMatrixNumNonzeros( m ) };
+      return { hypre_CSRMatrixJ( m ), hypre_CSRMatrixNumNonzeros( m ) };
+   }
+
+   ConstColumnIndexesViewType
+   getRowOffsets() const
+   {
+      if( m == nullptr )
+         return {};
+      if( hypre_CSRMatrixI( m ) == nullptr )
+         return {};
+      return { hypre_CSRMatrixI( m ), hypre_CSRMatrixNumRows( m ) + 1 };
+   }
+
+   ColumnIndexesViewType
+   getRowOffsets()
+   {
+      if( m == nullptr )
+         return {};
+      if( hypre_CSRMatrixI( m ) == nullptr )
+         return {};
+      return { hypre_CSRMatrixI( m ), hypre_CSRMatrixNumRows( m ) + 1 };
+   }
+
+   ConstSegmentsViewType
+   getSegments() const
+   {
+      if( m == nullptr )
+         return {};
+      return { getRowOffsets(), typename ConstSegmentsViewType::KernelType{} };
+   }
+
+   SegmentsViewType
+   getSegments()
+   {
+      if( m == nullptr )
+         return {};
+      return { getRowOffsets(), typename SegmentsViewType::KernelType{} };
+   }
+
+   ConstViewType
+   getConstView() const
+   {
+      // FIXME: remove const_cast
+      return { getRows(), getColumns(), getValues(), getColumnIndexes(), const_cast< HypreCSRMatrix* >( this )->getSegments() };
+   }
+
+   ViewType
+   getView()
+   {
+      return { getRows(), getColumns(), getValues(), getColumnIndexes(), getSegments() };
+   }
+
+   /**
+    * \brief Drop previously set data (deallocate if the matrix was the owner)
+    * and bind to the given data (i.e., the matrix does not become the owner).
+    */
+   void
+   bind( IndexType rows,
+         IndexType columns,
+         ValuesViewType values,
+         ColumnIndexesViewType columnIndexes,
+         ColumnIndexesViewType rowOffsets )
+   {
+      TNL_ASSERT_EQ( rowOffsets.getSize(), rows + 1, "wrong size of rowOffsets" );
+      TNL_ASSERT_EQ( values.getSize(), rowOffsets.getElement( rows ), "wrong size of values" );
+      TNL_ASSERT_EQ( columnIndexes.getSize(), rowOffsets.getElement( rows ), "wrong size of columnIndexes" );
+
+      // drop/deallocate the current data
+      reset();
+
+      // create handle for the matrix
+      m = hypre_CSRMatrixCreate( rows, columns, rowOffsets.getElement( rows ) );
+
+      // set view data
+      hypre_CSRMatrixOwnsData( m ) = 0;
+      hypre_CSRMatrixData( m ) = values.getData();
+      hypre_CSRMatrixJ( m ) = columnIndexes.getData();
+      hypre_CSRMatrixI( m ) = rowOffsets.getData();
+      // TODO: v->memory_location
+   }
+
+   void
+   bind( ViewType view )
+   {
+      bind( view.getRows(), view.getColumns(), view.getValues(), view.getColumnIndexes(), view.getSegments().getOffsets() );
+   }
+
+   void
+   bind( MatrixType& matrix )
+   {
+      bind( matrix.getView() );
+   }
+
+   void
+   bind( HypreCSRMatrix& matrix )
+   {
+      bind( matrix.getRows(), matrix.getColumns(), matrix.getValues(), matrix.getColumnIndexes(), matrix.getRowOffsets() );
+   }
+
+   /**
+    * \brief Convert Hypre's format to \e HypreCSRMatrix
+    *
+    * \param take_ownership indicates if the matrix should take ownership of
+    * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does
+    * not need it anymore.
+    */
+   void
+   bind( hypre_CSRMatrix* handle, bool take_ownership = true )
+   {
+      // drop/deallocate the current data
+      reset();
+
+      // set the handle and ownership flag
+      m = handle;
+      owns_handle = take_ownership;
+   }
+
+   //! \brief Reset the matrix to empty state.
+   void
+   reset()
+   {
+      if( owns_handle && m != nullptr ) {
+         // FIXME: workaround for https://github.com/hypre-space/hypre/issues/621
+         if( ! hypre_CSRMatrixOwnsData( m ) )
+            // prevent deallocation of the "I" array when the handle does not own it
+            hypre_CSRMatrixI( m ) = nullptr;
+
+         hypre_CSRMatrixDestroy( m );
+         m = nullptr;
+      }
+      else
+         m = nullptr;
+      owns_handle = true;
+   }
+
+   /**
+    * \brief Set the new matrix dimensions.
+    *
+    * - if the matrix previously owned data, they are deallocated
+    * - new size is set
+    * - the matrix is initialized with \e hypre_CSRMatrixInitialize
+    *   (i.e., data are allocated)
+    */
+   void
+   setDimensions( IndexType rows, IndexType cols )
+   {
+      reset();
+      m = hypre_CSRMatrixCreate( rows, cols, 0 );
+      hypre_CSRMatrixInitialize( m );
+   }
+
+   template< typename RowsCapacitiesVector >
+   void
+   setRowCapacities( const RowsCapacitiesVector& rowCapacities )
+   {
+      TNL_ASSERT_EQ(
+         rowCapacities.getSize(), this->getRows(), "Number of matrix rows does match the rowCapacities vector size." );
+
+      const IndexType nonzeros = TNL::sum( rowCapacities );
+      hypre_CSRMatrixResize( m, getRows(), getColumns(), nonzeros );
+
+      // initialize row pointers
+      auto rowOffsets = getRowOffsets();
+      TNL_ASSERT_EQ(
+         rowOffsets.getSize(), rowCapacities.getSize() + 1, "The size of the rowOffsets vector does not match rowCapacities" );
+      // GOTCHA: when rowCapacities.getSize() == 0, getView returns a full view with size == 1
+      if( rowCapacities.getSize() > 0 ) {
+         auto view = rowOffsets.getView( 0, rowCapacities.getSize() );
+         view = rowCapacities;
+      }
+      rowOffsets.setElement( rowCapacities.getSize(), 0 );
+      Algorithms::inplaceExclusiveScan( rowOffsets );
+
+      // reset column indices with the padding index
+      getColumnIndexes().setValue( -1 );
+   }
+
+   /**
+    * \brief Reorders the column and data arrays of a square matrix, such that
+    * the first entry in each row is the diagonal one.
+    *
+    * Note that the \e hypre_CSRMatrixReorder function only swaps the diagonal
+    * and first entries in the row, but this function also shifts the
+    * subdiagonal entries to ensure that the column indices (except for the
+    * diagonal one) remain in the original order.
+    */
+   void
+   reorderDiagonalEntries()
+   {
+      // operation does not make sense for non-square matrices
+      if( getRows() != getColumns() )
+         return;
+
+      getView().forAllRows(
+         [] __cuda_callable__( typename ViewType::RowView & row ) mutable
+         {
+            const IndexType j_diag = row.getRowIndex();
+            IndexType c_diag = 0;
+            RealType v_diag;
+
+            // find the diagonal entry
+            for( IndexType c = 0; c < row.getSize(); c++ )
+               if( row.getColumnIndex( c ) == j_diag ) {
+                  c_diag = c;
+                  v_diag = row.getValue( c );
+                  break;
+               }
+
+            if( c_diag > 0 ) {
+               // shift the subdiagonal elements
+               for( IndexType c = c_diag; c > 0; c-- )
+                  row.setElement( c, row.getColumnIndex( c - 1 ), row.getValue( c - 1 ) );
+
+               // write the diagonal entry to the first position
+               row.setElement( 0, j_diag, v_diag );
+            }
+         } );
+   }
+
+protected:
+   hypre_CSRMatrix* m = nullptr;
+   bool owns_handle = true;
+};
+
+}  // namespace Matrices
+}  // namespace TNL
+
+#endif  // HAVE_HYPRE
diff --git a/src/TNL/Matrices/HypreParCSRMatrix.h b/src/TNL/Matrices/HypreParCSRMatrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..b409e8d7c5e6ab2b58d8cb4edef527aada8edfdc
--- /dev/null
+++ b/src/TNL/Matrices/HypreParCSRMatrix.h
@@ -0,0 +1,410 @@
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include <memory>  // std::unique_ptr
+
+   #include <TNL/MPI/Comm.h>
+   #include <TNL/MPI/Wrappers.h>
+   #include <TNL/Containers/Subrange.h>
+   #include <TNL/Containers/HypreVector.h>
+   #include <TNL/Matrices/HypreCSRMatrix.h>
+
+namespace TNL {
+namespace Matrices {
+
+/**
+ * \brief Wrapper for Hypre's sequential CSR matrix.
+ *
+ * Links to upstream sources:
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.h
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/csr_matrix.c
+ * - https://github.com/hypre-space/hypre/blob/master/src/seq_mv/seq_mv.h (catch-all interface)
+ *
+ * \ingroup Hypre
+ */
+class HypreParCSRMatrix
+{
+public:
+   using RealType = HYPRE_Real;
+   using ValueType = RealType;
+   using DeviceType = HYPRE_Device;
+   using IndexType = HYPRE_Int;
+   using LocalRangeType = Containers::Subrange< IndexType >;
+
+   using MatrixType = HypreCSRMatrix;
+
+   using ValuesViewType = Containers::VectorView< RealType, DeviceType, IndexType >;
+   using ConstValuesViewType = typename ValuesViewType::ConstViewType;
+   using ColumnIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using ColumnIndexesViewType = typename ColumnIndexesVectorType::ViewType;
+   using ConstColumnIndexesViewType = typename ColumnIndexesVectorType::ConstViewType;
+   using SegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, IndexType >;
+   using ConstSegmentsViewType = Algorithms::Segments::CSRViewDefault< DeviceType, std::add_const_t< IndexType > >;
+
+   HypreParCSRMatrix() = default;
+
+   // TODO: behavior should depend on "owns_data" (shallow vs deep copy)
+   HypreParCSRMatrix( const HypreParCSRMatrix& other ) = delete;
+
+   HypreParCSRMatrix( HypreParCSRMatrix&& other ) noexcept : m( other.m )
+   {
+      other.m = nullptr;
+   }
+
+   // TODO should do a deep copy
+   HypreParCSRMatrix&
+   operator=( const HypreParCSRMatrix& other ) = delete;
+
+   HypreParCSRMatrix&
+   operator=( HypreParCSRMatrix&& other ) noexcept
+   {
+      m = other.m;
+      other.m = nullptr;
+      return *this;
+   }
+
+   /**
+    * \brief Convert Hypre's format to \e HypreParCSRMatrix
+    *
+    * \param take_ownership indicates if the matrix should take ownership of
+    * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does
+    * not need it anymore.
+    */
+   explicit HypreParCSRMatrix( hypre_ParCSRMatrix* handle, bool take_ownership = true )
+   {
+      bind( handle, take_ownership );
+   }
+
+   /**
+    * \brief Wrap a global \e hypre_CSRMatrix into \e HypreParCSRMatrix
+    *
+    * Each rank will get its own independent \e HypreParCSRMatrix with the
+    * `MPI_COMM_SELF` communicator. The data is not copied, but
+    * \e HypreParCSRMatrix keeps a non-owning reference.
+    */
+   static HypreParCSRMatrix
+   wrapCSRMatrix( hypre_CSRMatrix* matrix )
+   {
+      const IndexType global_num_rows = hypre_CSRMatrixNumRows( matrix );
+      const IndexType global_num_cols = hypre_CSRMatrixNumCols( matrix );
+      IndexType row_starts[ 2 ];
+      row_starts[ 0 ] = 0;
+      row_starts[ 1 ] = global_num_rows;
+      IndexType col_starts[ 2 ];
+      col_starts[ 0 ] = 0;
+      col_starts[ 1 ] = global_num_cols;
+
+      // create the handle
+      hypre_ParCSRMatrix* A =
+         hypre_ParCSRMatrixCreate( MPI_COMM_SELF, global_num_rows, global_num_cols, row_starts, col_starts, 0, 0, 0 );
+
+      // bind the diag block
+      hypre_CSRMatrixDestroy( hypre_ParCSRMatrixDiag( A ) );
+      hypre_ParCSRMatrixDiag( A ) = matrix;
+
+      // make sure that the first entry in each row of a square diagonal block is the diagonal element
+      HypreCSRMatrix diag_view( hypre_ParCSRMatrixDiag( A ), false );
+      diag_view.reorderDiagonalEntries();
+
+      // set additional attributes
+      hypre_ParCSRMatrixNumNonzeros( A ) = hypre_CSRMatrixNumNonzeros( matrix );
+
+      // initialize auxiliary substructures
+      hypre_CSRMatrixSetRownnz( hypre_ParCSRMatrixDiag( A ) );
+      hypre_MatvecCommPkgCreate( A );
+
+      // set the diag owner flag
+      HypreParCSRMatrix result( A );
+      result.owns_diag = false;
+      return result;
+   }
+
+   /**
+    * \brief Constructs a \e ParCSRMatrix distributed across the processors in
+    * \e communicator from a \e CSRMatrix on rank 0.
+    *
+    * \param global_row_starts Array of `nproc + 1` elements, where `nproc` is
+    *                          the number of ranks in the \e communicator.
+    * \param global_col_starts Array of `nproc + 1` elements, where `nproc` is
+    *                          the number of ranks in the \e communicator.
+    */
+   static HypreParCSRMatrix
+   fromMasterRank( MPI_Comm communicator, IndexType* global_row_starts, IndexType* global_col_starts, hypre_CSRMatrix* matrix )
+   {
+      TNL_ASSERT_TRUE( global_row_starts, "invalid input" );
+      TNL_ASSERT_TRUE( global_col_starts, "invalid input" );
+      TNL_ASSERT_TRUE( matrix, "invalid input" );
+
+      // NOTE: this call creates a matrix on host even when device support is
+      // enabled in Hypre
+      hypre_ParCSRMatrix* A = hypre_CSRMatrixToParCSRMatrix( communicator, matrix, global_row_starts, global_col_starts );
+
+      // make sure that the first entry in each row of a square diagonal block is the diagonal element
+      HypreCSRMatrix diag_view( hypre_ParCSRMatrixDiag( A ), false );
+      diag_view.reorderDiagonalEntries();
+
+      // initialize auxiliary substructures
+      hypre_CSRMatrixSetRownnz( hypre_ParCSRMatrixDiag( A ) );
+      hypre_ParCSRMatrixSetNumNonzeros( A );
+      hypre_MatvecCommPkgCreate( A );
+
+      return HypreParCSRMatrix( A );
+   }
+
+   /**
+    * \brief Constructs a \e ParCSRMatrix distributed across the processors in
+    * \e communicator from a \e CSRMatrix on rank 0.
+    *
+    * \param x The values of the vector are unused, but its distribution is
+    *          used for the distribution of the matrix **columns**.
+    * \param b The values of the vector are unused, but its distribution is
+    *          used for the distribution of the matrix **rows**.
+    */
+   static HypreParCSRMatrix
+   fromMasterRank( hypre_CSRMatrix* matrix, hypre_ParVector* x, hypre_ParVector* b )
+   {
+      const MPI_Comm communicator = hypre_ParVectorComm( x );
+      if( communicator == MPI_COMM_NULL )
+         return {};
+
+      // construct global partitioning info on rank 0
+      // (the result on other ranks is not used)
+      const int nproc = MPI::GetSize( communicator );
+      std::unique_ptr< IndexType[] > sendbuf{ new IndexType[ nproc ] };
+
+      std::unique_ptr< IndexType[] > global_row_starts{ new IndexType[ nproc + 1 ] };
+      for( int i = 0; i < nproc; i++ )
+         sendbuf[ i ] = hypre_ParVectorFirstIndex( b );
+      MPI::Alltoall( sendbuf.get(), 1, global_row_starts.get(), 1, communicator );
+      global_row_starts[ nproc ] = hypre_ParVectorGlobalSize( b );
+
+      // if the matrix is square, global_col_starts should be equal to global_row_starts
+      // (otherwise Hypre puts everything into the offd block which would violate the
+      // AssumedPartition premise)
+      if( hypre_CSRMatrixNumRows( matrix ) == hypre_CSRMatrixNumCols( matrix ) ) {
+         return fromMasterRank( communicator, global_row_starts.get(), global_row_starts.get(), matrix );
+      }
+
+      // NOTE: this was not tested...
+      std::unique_ptr< IndexType[] > global_col_starts{ new IndexType[ nproc + 1 ] };
+      for( int i = 0; i < nproc; i++ )
+         sendbuf[ i ] = hypre_ParVectorFirstIndex( x );
+      MPI::Alltoall( sendbuf.get(), 1, global_col_starts.get(), 1, communicator );
+      global_col_starts[ nproc ] = hypre_ParVectorGlobalSize( x );
+
+      return fromMasterRank( communicator, global_row_starts.get(), global_col_starts.get(), matrix );
+   }
+
+   /**
+    * \brief Constructs a \e ParCSRMatrix from local blocks distributed across
+    * the processors in \e communicator.
+    *
+    * \param global_num_rows Global number of rows of the distributed matrix.
+    * \param global_num_cols Global number of columns of the distributed matrix.
+    * \param local_row_range The range `[begin, end)` of rows owned by the
+    *                        calling rank.
+    * \param local_col_range The range `[begin, end)` of columns owned by the
+    *                        calling rank. For square matrices it should be
+    *                        equal to \e local_row_range.
+    * \param matrix The local matrix block owned by the calling rank. The
+    *        number of rows must match the size of \e local_row_range and the
+    *        column indices must span the whole `[0, global_cols)` range.
+    */
+   static HypreParCSRMatrix
+   fromLocalBlocks( MPI_Comm communicator,
+                    IndexType global_num_rows,
+                    IndexType global_num_cols,
+                    LocalRangeType local_row_range,
+                    LocalRangeType local_col_range,
+                    hypre_CSRMatrix* local_A )
+   {
+      TNL_ASSERT_TRUE( local_A, "invalid input" );
+
+      IndexType row_starts[ 2 ];
+      row_starts[ 0 ] = local_row_range.getBegin();
+      row_starts[ 1 ] = local_row_range.getEnd();
+      IndexType col_starts[ 2 ];
+      col_starts[ 0 ] = local_col_range.getBegin();
+      col_starts[ 1 ] = local_col_range.getEnd();
+
+      hypre_ParCSRMatrix* A =
+         hypre_ParCSRMatrixCreate( communicator, global_num_rows, global_num_cols, row_starts, col_starts, 0, 0, 0 );
+
+      const IndexType first_col_diag = hypre_ParCSRMatrixFirstColDiag( A );
+      const IndexType last_col_diag = hypre_ParCSRMatrixLastColDiag( A );
+
+      // Hypre function which splits local_A into the diagonal and off-diagonal blocks
+      GenerateDiagAndOffd( local_A, A, first_col_diag, last_col_diag );
+
+      // make sure that the first entry in each row of a square diagonal block is the diagonal element
+      HypreCSRMatrix diag_view( hypre_ParCSRMatrixDiag( A ), false );
+      diag_view.reorderDiagonalEntries();
+
+      // initialize auxiliary substructures
+      hypre_CSRMatrixSetRownnz( hypre_ParCSRMatrixDiag( A ) );
+      hypre_ParCSRMatrixSetNumNonzeros( A );
+      hypre_MatvecCommPkgCreate( A );
+
+      return HypreParCSRMatrix( A );
+   }
+
+   operator const hypre_ParCSRMatrix*() const noexcept
+   {
+      return m;
+   }
+
+   operator hypre_ParCSRMatrix*() noexcept
+   {
+      return m;
+   }
+
+   // HYPRE_ParCSRMatrix is "equivalent" to pointer to hypre_ParCSRMatrix, but requires
+   // ugly C-style cast on the pointer (which is done even in Hypre itself)
+   // https://github.com/hypre-space/hypre/blob/master/src/parcsr_mv/HYPRE_parcsr_matrix.c
+   operator HYPRE_ParCSRMatrix() const noexcept
+   {
+      return (HYPRE_ParCSRMatrix) m;
+   }
+
+   ~HypreParCSRMatrix()
+   {
+      reset();
+   }
+
+   LocalRangeType
+   getLocalRowRange() const
+   {
+      if( m == nullptr )
+         return {};
+      return { hypre_ParCSRMatrixRowStarts( m )[ 0 ], hypre_ParCSRMatrixRowStarts( m )[ 1 ] };
+   }
+
+   LocalRangeType
+   getLocalColumnRange() const
+   {
+      if( m == nullptr )
+         return {};
+      return { hypre_ParCSRMatrixColStarts( m )[ 0 ], hypre_ParCSRMatrixColStarts( m )[ 1 ] };
+   }
+
+   IndexType
+   getRows() const
+   {
+      if( m == nullptr )
+         return 0;
+      return hypre_ParCSRMatrixGlobalNumRows( m );
+   }
+
+   IndexType
+   getColumns() const
+   {
+      if( m == nullptr )
+         return 0;
+      return hypre_ParCSRMatrixGlobalNumCols( m );
+   }
+
+   IndexType
+   getNonzeroElementsCount() const
+   {
+      if( m == nullptr )
+         return 0;
+      return hypre_ParCSRMatrixNumNonzeros( m );
+   }
+
+   MPI_Comm
+   getCommunicator() const
+   {
+      if( m == nullptr )
+         return MPI_COMM_NULL;
+      return hypre_ParCSRMatrixComm( m );
+   }
+
+   MatrixType
+   getDiagonalBlock()
+   {
+      if( m == nullptr )
+         return {};
+      return HypreCSRMatrix( hypre_ParCSRMatrixDiag( m ), false );
+   }
+
+   MatrixType
+   getOffdiagonalBlock()
+   {
+      if( m == nullptr )
+         return {};
+      return HypreCSRMatrix( hypre_ParCSRMatrixOffd( m ), false );
+   }
+
+   Containers::VectorView< HYPRE_Int, HYPRE_Device, HYPRE_Int >
+   getOffdiagonalColumnsMapping()
+   {
+      if( m == nullptr )
+         return {};
+      HYPRE_Int* col_map_offd = hypre_ParCSRMatrixColMapOffd( m );
+      const HYPRE_Int num_cols = hypre_CSRMatrixNumCols( hypre_ParCSRMatrixOffd( m ) );
+      return { col_map_offd, num_cols };
+   }
+
+   //! \brief Constructs a local matrix by merging the diagonal and off-diagonal blocks.
+   HypreCSRMatrix
+   getMergedLocalMatrix() const
+   {
+      hypre_CSRMatrix* local_A = hypre_MergeDiagAndOffd( m );
+      return HypreCSRMatrix( local_A, true );
+   }
+
+   /**
+    * \brief Convert Hypre's format to \e HypreParCSRMatrix
+    *
+    * \param take_ownership indicates if the matrix should take ownership of
+    * the handle, i.e. whether to call \e hypre_CSRMatrixDestroy when it does
+    * not need it anymore.
+    */
+   void
+   bind( hypre_ParCSRMatrix* handle, bool take_ownership = true )
+   {
+      // drop/deallocate the current data
+      reset();
+
+      // take ownership of the handle
+      m = handle;
+      owns_handle = take_ownership;
+   }
+
+   //! \brief Reset the matrix to empty state.
+   void
+   reset()
+   {
+      if( owns_handle && m != nullptr ) {
+         if( ! owns_diag )
+            // hypreParCSRMatrixDestroy does not work when diag is a nullptr
+            hypre_ParCSRMatrixDiag( m ) = hypre_CSRMatrixCreate( 0, 0, 0 );
+         if( ! owns_offd )
+            // hypreParCSRMatrixDestroy does not work when offd is a nullptr
+            hypre_ParCSRMatrixOffd( m ) = hypre_CSRMatrixCreate( 0, 0, 0 );
+         if( ! owns_col_map_offd )
+            hypre_ParCSRMatrixColMapOffd( m ) = nullptr;
+         hypre_ParCSRMatrixDestroy( m );
+         m = nullptr;
+      }
+      else
+         m = nullptr;
+      owns_handle = true;
+      owns_diag = true;
+      owns_offd = true;
+      owns_col_map_offd = true;
+   }
+
+protected:
+   hypre_ParCSRMatrix* m = nullptr;
+   bool owns_handle = true;
+   bool owns_diag = true;
+   bool owns_offd = true;
+   bool owns_col_map_offd = true;
+};
+
+}  // namespace Matrices
+}  // namespace TNL
+
+#endif  // HAVE_HYPRE
diff --git a/src/TNL/Solvers/Linear/Hypre.h b/src/TNL/Solvers/Linear/Hypre.h
new file mode 100644
index 0000000000000000000000000000000000000000..229da24f65e2695e51c2c4876769faf7d3992d52
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Hypre.h
@@ -0,0 +1,771 @@
+// Copyright (c) 2004-2022 Tomáš Oberhuber et al.
+//
+// This file is part of TNL - Template Numerical Library (https://tnl-project.org/)
+//
+// SPDX-License-Identifier: MIT
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include <TNL/Containers/HypreParVector.h>
+   #include <TNL/Matrices/HypreParCSRMatrix.h>
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+
+//! \addtogroup Hypre
+//! @{
+
+//! \brief Abstract class for Hypre's solvers and preconditioners
+class HypreSolver
+{
+public:
+   //! \brief How to treat errors returned by Hypre function calls
+   enum ErrorMode
+   {
+      IGNORE_HYPRE_ERRORS,  ///< Ignore Hypre errors
+      WARN_HYPRE_ERRORS,    ///< Issue warnings on Hypre errors
+      ABORT_HYPRE_ERRORS    ///< Abort on Hypre errors (default in base class)
+   };
+
+protected:
+   //! Handle for the Hypre solver
+   HYPRE_Solver solver = nullptr;
+
+   //! The linear system matrix
+   const Matrices::HypreParCSRMatrix* A = nullptr;
+
+   //! Indicates if Hypre's setup function was already called
+   mutable bool setup_called = false;
+
+   //! How to treat Hypre errors
+   mutable ErrorMode error_mode = ABORT_HYPRE_ERRORS;
+
+   //! Hook function that is called at the end of \ref solve
+   virtual void
+   postSolveHook() const
+   {}
+
+public:
+   HypreSolver() = default;
+
+   explicit HypreSolver( const Matrices::HypreParCSRMatrix& A );
+
+   //! Type-cast to \e HYPRE_Solver
+   virtual operator HYPRE_Solver() const
+   {
+      return solver;
+   }
+
+   //! Hypre's internal setup function
+   virtual HYPRE_PtrToParSolverFcn
+   setupFcn() const = 0;
+
+   //! Hypre's internal solve function
+   virtual HYPRE_PtrToParSolverFcn
+   solveFcn() const = 0;
+
+   /**
+    * \brief Set the matrix of the linear system to be solved.
+    *
+    * This function also resets the current state of the solver causing the
+    * Hypre's internal setup function to be called before the solve function.
+    */
+   virtual void setMatrix( const Matrices::HypreParCSRMatrix& op )
+   {
+      A = &op;
+      setup_called = false;
+   }
+
+   //! Solve the linear system Ax=b
+   virtual void
+   solve( const Containers::HypreParVector& b, Containers::HypreParVector& x ) const;
+
+   /**
+    * \brief Set the behavior for treating Hypre errors, see the \ref ErrorMode
+    * enum. The default mode in the base class is \ref ABORT_HYPRE_ERRORS.
+    */
+   void
+   setErrorMode( ErrorMode err_mode ) const
+   {
+      error_mode = err_mode;
+   }
+
+   virtual ~HypreSolver() = default;
+};
+
+/**
+ * \brief Wrapper for the PCG solver in Hypre
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HyprePCG solver;
+ * HYPRE_PCGSetTol(solver, 1e-7);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HyprePCG : public HypreSolver
+{
+private:
+   HypreSolver* precond = nullptr;
+
+public:
+   explicit HyprePCG( MPI_Comm comm );
+
+   explicit HyprePCG( const Matrices::HypreParCSRMatrix& A );
+
+   ~HyprePCG() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   //! Set the Hypre solver to be used as a preconditioner
+   void
+   setPreconditioner( HypreSolver& precond );
+
+   void
+   setTol( double tol )
+   {
+      HYPRE_PCGSetTol( solver, tol );
+   }
+
+   void
+   setAbsTol( double atol )
+   {
+      HYPRE_PCGSetAbsoluteTol( solver, atol );
+   }
+
+   void
+   setMaxIter( int max_iter )
+   {
+      HYPRE_PCGSetMaxIter( solver, max_iter );
+   }
+
+   void
+   setPrintLevel( int print_level )
+   {
+      HYPRE_PCGSetPrintLevel( solver, print_level );
+   }
+
+   /**
+    * \brief Use the L2 norm of the residual for measuring PCG convergence,
+    * plus (optionally) 1) periodically recompute true residuals from scratch;
+    * and 2) enable residual-based stopping criteria.
+    */
+   void
+   setResidualConvergenceOptions( int res_frequency = -1, double rtol = 0.0 );
+
+   HYPRE_Int
+   getNumIterations() const
+   {
+      HYPRE_Int num_it;
+      HYPRE_ParCSRPCGGetNumIterations( solver, &num_it );
+      return num_it;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRPCGSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRPCGSolve;
+   }
+
+protected:
+   void
+   postSolveHook() const override;
+};
+
+/**
+ * \brief Wrapper for the BiCGSTAB solver in Hypre
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreBiCGSTAB solver;
+ * HYPRE_BiCGSTABSetTol(solver, 1e-7);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreBiCGSTAB : public HypreSolver
+{
+private:
+   HypreSolver* precond = nullptr;
+
+   // Hypre does not provide a way to query this from the BiCGSTAB solver
+   // https://github.com/hypre-space/hypre/issues/627
+   HYPRE_Int print_level = 0;
+
+public:
+   explicit HypreBiCGSTAB( MPI_Comm comm );
+
+   explicit HypreBiCGSTAB( const Matrices::HypreParCSRMatrix& A_ );
+
+   ~HypreBiCGSTAB() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   //! Set the Hypre solver to be used as a preconditioner
+   void
+   setPreconditioner( HypreSolver& precond );
+
+   void
+   setTol( double tol )
+   {
+      HYPRE_BiCGSTABSetTol( solver, tol );
+   }
+
+   void
+   setAbsTol( double atol )
+   {
+      HYPRE_BiCGSTABSetAbsoluteTol( solver, atol );
+   }
+
+   void
+   setMaxIter( int max_iter )
+   {
+      HYPRE_BiCGSTABSetMaxIter( solver, max_iter );
+   }
+
+   void
+   setPrintLevel( int print_level )
+   {
+      HYPRE_BiCGSTABSetPrintLevel( solver, print_level );
+      this->print_level = print_level;
+   }
+
+   HYPRE_Int
+   getNumIterations() const
+   {
+      HYPRE_Int num_it;
+      HYPRE_ParCSRBiCGSTABGetNumIterations( solver, &num_it );
+      return num_it;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRBiCGSTABSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRBiCGSTABSolve;
+   }
+
+protected:
+   void
+   postSolveHook() const override;
+};
+
+/**
+ * \brief Wrapper for the GMRES solver in Hypre
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreGMRES solver;
+ * HYPRE_GMRESSetTol(solver, 1e-7);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreGMRES : public HypreSolver
+{
+private:
+   HypreSolver* precond = nullptr;
+
+public:
+   explicit HypreGMRES( MPI_Comm comm );
+
+   explicit HypreGMRES( const Matrices::HypreParCSRMatrix& A_ );
+
+   ~HypreGMRES() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   //! Set the Hypre solver to be used as a preconditioner
+   void
+   setPreconditioner( HypreSolver& precond );
+
+   void
+   setTol( double tol )
+   {
+      HYPRE_GMRESSetTol( solver, tol );
+   }
+
+   void
+   setAbsTol( double tol )
+   {
+      HYPRE_GMRESSetAbsoluteTol( solver, tol );
+   }
+
+   void
+   setMaxIter( int max_iter )
+   {
+      HYPRE_GMRESSetMaxIter( solver, max_iter );
+   }
+
+   void
+   setKDim( int k_dim )
+   {
+      HYPRE_GMRESSetKDim( solver, k_dim );
+   }
+
+   void
+   setPrintLevel( int print_level )
+   {
+      HYPRE_GMRESSetPrintLevel( solver, print_level );
+   }
+
+   HYPRE_Int
+   getNumIterations() const
+   {
+      HYPRE_Int num_it;
+      HYPRE_ParCSRGMRESGetNumIterations( solver, &num_it );
+      return num_it;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRGMRESSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRGMRESSolve;
+   }
+
+protected:
+   void
+   postSolveHook() const override;
+};
+
+/**
+ * \brief Wrapper for the Flexible GMRES solver in Hypre
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreFlexGMRES solver;
+ * HYPRE_FlexGMRESSetTol(solver, 1e-7);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreFlexGMRES : public HypreSolver
+{
+private:
+   HypreSolver* precond = nullptr;
+
+public:
+   explicit HypreFlexGMRES( MPI_Comm comm );
+
+   explicit HypreFlexGMRES( const Matrices::HypreParCSRMatrix& A_ );
+
+   ~HypreFlexGMRES() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   //! Set the Hypre solver to be used as a preconditioner
+   void
+   setPreconditioner( HypreSolver& precond );
+
+   void
+   setTol( double tol )
+   {
+      HYPRE_FlexGMRESSetTol( solver, tol );
+   }
+
+   void
+   setMaxIter( int max_iter )
+   {
+      HYPRE_FlexGMRESSetMaxIter( solver, max_iter );
+   }
+
+   void
+   setKDim( int k_dim )
+   {
+      HYPRE_FlexGMRESSetKDim( solver, k_dim );
+   }
+
+   void
+   setPrintLevel( int print_level )
+   {
+      HYPRE_FlexGMRESSetPrintLevel( solver, print_level );
+   }
+
+   HYPRE_Int
+   getNumIterations() const
+   {
+      HYPRE_Int num_it;
+      HYPRE_ParCSRFlexGMRESGetNumIterations( solver, &num_it );
+      return num_it;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRFlexGMRESSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRFlexGMRESSolve;
+   }
+
+protected:
+   void
+   postSolveHook() const override;
+};
+
+//! \brief Wrapper for the identity operator as a Hypre solver
+class HypreIdentity : public HypreSolver
+{
+public:
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) hypre_ParKrylovIdentitySetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) hypre_ParKrylovIdentity;
+   }
+};
+
+//! \brief Wrapper for the Jacobi preconditioner in Hypre
+class HypreDiagScale : public HypreSolver
+{
+public:
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRDiagScaleSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSRDiagScale;
+   }
+};
+
+/**
+ * \brief Wrapper for Hypre's preconditioner that is intended for matrices that
+ * are triangular in some ordering.
+ *
+ * Finds correct ordering and performs forward substitution on processor as
+ * approximate inverse. Exact on one processor.
+ */
+class HypreTriSolve : public HypreSolver
+{
+public:
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSROnProcTriSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParCSROnProcTriSolve;
+   }
+};
+
+/**
+ * \brief Wrapper for the ParaSails preconditioner in Hypre
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreParaSails precond;
+ * HYPRE_ParaSailsSetSym(precond, 1);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreParaSails : public HypreSolver
+{
+private:
+   //! \brief Reset the preconditioner to the default state
+   void
+   reset( MPI_Comm comm );
+
+public:
+   explicit HypreParaSails( MPI_Comm comm );
+
+   explicit HypreParaSails( const Matrices::HypreParCSRMatrix& A );
+
+   ~HypreParaSails() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParaSailsSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ParaSailsSolve;
+   }
+};
+
+/**
+ * \brief Wrapper for the Euclid preconditioner in Hypre
+ *
+ * Euclid implements the Parallel Incomplete LU factorization technique. For
+ * more information see:
+ * "A Scalable Parallel Algorithm for Incomplete Factor Preconditioning" by
+ * David Hysom and Alex Pothen, https://doi.org/10.1137/S1064827500376193
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreEuclid precond;
+ * HYPRE_EuclidSetLevel(precond, 2);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreEuclid : public HypreSolver
+{
+private:
+   //! \brief Reset the preconditioner to the default state
+   void
+   reset( MPI_Comm comm );
+
+public:
+   explicit HypreEuclid( MPI_Comm comm );
+
+   explicit HypreEuclid( const Matrices::HypreParCSRMatrix& A );
+
+   ~HypreEuclid() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_EuclidSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_EuclidSolve;
+   }
+};
+
+/**
+ * \brief Wrapper for Hypre's native parallel ILU preconditioner.
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreILU precond;
+ * HYPRE_ILUSetLevelOfFill(precond, 1);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreILU : public HypreSolver
+{
+private:
+   //! \brief Set the ILU default options
+   void
+   setDefaultOptions();
+
+   //! \brief Reset the preconditioner to the default state (including default options).
+   void
+   reset();
+
+public:
+   HypreILU();
+
+   explicit HypreILU( const Matrices::HypreParCSRMatrix& A );
+
+   ~HypreILU() override;
+
+   //! Set the print level: 0 = none, 1 = setup, 2 = solve, 3 = setup+solve
+   void
+   setPrintLevel( HYPRE_Int print_level )
+   {
+      HYPRE_ILUSetPrintLevel( solver, print_level );
+   }
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ILUSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_ILUSolve;
+   }
+};
+
+/** \brief Wrapper for the BoomerAMG solver/preconditioner in Hypre
+ *
+ * Note that the wrapper class sets its own default options that are different
+ * from Hypre. By default, the instance is set up for preconditioner use, i.e.
+ * with zero tolerance and one V cycle. At least these two parameters need to
+ * be changed when used as a solver.
+ *
+ * Parameters can be set using native Hypre functions, e.g.
+ *
+ * \code
+ * TNL::Solvers::Linear::HypreBoomerAMG solver;
+ * HYPRE_BoomerAMGSetTol(solver, 1e-7);
+ * HYPRE_BoomerAMGSetMaxIter(solver, 20);
+ * \endcode
+ *
+ * See the [Hypre Reference Manual][manual] for the available parameters.
+ *
+ * [manual]: https://hypre.readthedocs.io/_/downloads/en/latest/pdf/
+ */
+class HypreBoomerAMG : public HypreSolver
+{
+private:
+   //! \brief Default, generally robust, BoomerAMG options
+   void
+   setDefaultOptions();
+
+   //! \brief Reset the preconditioner to the default state (including default options).
+   void
+   reset();
+
+public:
+   HypreBoomerAMG();
+
+   explicit HypreBoomerAMG( const Matrices::HypreParCSRMatrix& A );
+
+   ~HypreBoomerAMG() override;
+
+   void
+   setMatrix( const Matrices::HypreParCSRMatrix& op ) override;
+
+   //! \brief More robust options for systems of PDEs
+   void
+   setSystemsOptions( int dim, bool order_bynodes = false );
+
+   /**
+    * \brief Set parameters to use AIR AMG solver for advection-dominated problems.
+    *
+    * See "Nonsymmetric Algebraic Multigrid Based on Local Approximate Ideal
+    * Restriction (AIR)," Manteuffel, Ruge, Southworth, SISC (2018),
+    * DOI:/10.1137/17M1144350.
+    *
+    * \param restrict_type Defines which parallel restriction operator is used.
+    *                      There are the following options:
+    *                      0: transpose of the interpolation operator
+    *                      1: AIR-1 - Approximate Ideal Restriction (distance 1)
+    *                      2: AIR-2 - Approximate Ideal Restriction (distance 2)
+    * \param relax_order Defines in which order the points are relaxed. There
+    *                    are the following options:
+    *                    0: the points are relaxed in natural or lexicographic
+    *                       order on each processor
+    *                    1: CF-relaxation is used
+    */
+   void
+   setAdvectiveOptions( int restrict_type = 0, int relax_order = 1 );
+
+   //! Set the print level: 0 = none, 1 = setup, 2 = solve, 3 = setup+solve
+   void
+   setPrintLevel( int print_level )
+   {
+      HYPRE_BoomerAMGSetPrintLevel( solver, print_level );
+   }
+
+   void
+   setMaxIter( int max_iter )
+   {
+      HYPRE_BoomerAMGSetMaxIter( solver, max_iter );
+   }
+
+   void
+   setTol( double tol )
+   {
+      HYPRE_BoomerAMGSetTol( solver, tol );
+   }
+
+   HYPRE_Int
+   getNumIterations() const
+   {
+      HYPRE_Int num_it;
+      HYPRE_BoomerAMGGetNumIterations( solver, &num_it );
+      return num_it;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   setupFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_BoomerAMGSetup;
+   }
+
+   HYPRE_PtrToParSolverFcn
+   solveFcn() const override
+   {
+      return (HYPRE_PtrToParSolverFcn) HYPRE_BoomerAMGSolve;
+   }
+
+protected:
+   void
+   postSolveHook() const override;
+};
+
+}  // namespace Linear
+}  // namespace Solvers
+}  // namespace TNL
+
+   #include "Hypre.hpp"
+
+// this is a Doxygen end-group marker
+//! @}
+
+#endif  // HAVE_HYPRE
diff --git a/src/TNL/Solvers/Linear/Hypre.hpp b/src/TNL/Solvers/Linear/Hypre.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a394a30e945ea82497db26c98b1554ff987b5c29
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Hypre.hpp
@@ -0,0 +1,561 @@
+// Copyright (c) 2004-2022 Tomáš Oberhuber et al.
+//
+// This file is part of TNL - Template Numerical Library (https://tnl-project.org/)
+//
+// SPDX-License-Identifier: MIT
+
+// Implemented by: Jakub Klinkovský
+
+#pragma once
+
+#ifdef HAVE_HYPRE
+
+   #include "Hypre.h"
+
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+
+HypreSolver::HypreSolver( const Matrices::HypreParCSRMatrix& A )
+{
+   this->A = &A;
+}
+
+void
+HypreSolver::solve( const Containers::HypreParVector& b, Containers::HypreParVector& x ) const
+{
+   HYPRE_Int err_flag;
+   if( A == nullptr ) {
+      throw std::runtime_error( "HypreSolver::solve(...) : HypreParCSRMatrix A is missing" );
+   }
+
+   if( ! setup_called ) {
+      err_flag = setupFcn()( *this, *A, b, x );
+      if( err_flag != 0 ) {
+         if( error_mode == WARN_HYPRE_ERRORS )
+            std::cout << "Error during setup! Error code: " << err_flag << std::endl;
+         else if( error_mode == ABORT_HYPRE_ERRORS )
+            throw std::runtime_error( "Error during setup! Error code: " + std::to_string( err_flag ) );
+      }
+      hypre_error_flag = 0;
+      setup_called = true;
+   }
+
+   err_flag = solveFcn()( *this, *A, b, x );
+   if( err_flag != 0 ) {
+      if( error_mode == WARN_HYPRE_ERRORS )
+         std::cout << "Error during solve! Error code: " << err_flag << std::endl;
+      else if( error_mode == ABORT_HYPRE_ERRORS )
+         throw std::runtime_error( "Error during solve! Error code: " + std::to_string( err_flag ) );
+   }
+   hypre_error_flag = 0;
+
+   postSolveHook();
+}
+
+HyprePCG::HyprePCG( MPI_Comm comm )
+{
+   HYPRE_ParCSRPCGCreate( comm, &solver );
+}
+
+HyprePCG::HyprePCG( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A )
+{
+   const MPI_Comm comm = A.getCommunicator();
+   HYPRE_ParCSRPCGCreate( comm, &solver );
+}
+
+HyprePCG::~HyprePCG()
+{
+   HYPRE_ParCSRPCGDestroy( solver );
+}
+
+void
+HyprePCG::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   HypreSolver::setMatrix( op );
+   if( precond != nullptr )
+      precond->setMatrix( *A );
+}
+
+void
+HyprePCG::setPreconditioner( HypreSolver& precond_ )
+{
+   precond = &precond_;
+   HYPRE_ParCSRPCGSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ );
+}
+
+void
+HyprePCG::setResidualConvergenceOptions( int res_frequency, double rtol )
+{
+   HYPRE_PCGSetTwoNorm( solver, 1 );
+   if( res_frequency > 0 )
+      HYPRE_PCGSetRecomputeResidualP( solver, res_frequency );
+   if( rtol > 0.0 )
+      HYPRE_PCGSetResidualTol( solver, rtol );
+}
+
+void
+HyprePCG::postSolveHook() const
+{
+   HYPRE_Int print_level;
+   HYPRE_PCGGetPrintLevel( solver, &print_level );
+
+   if( print_level > 0 ) {
+      HYPRE_Int num_iterations;
+      HYPRE_PCGGetNumIterations( solver, &num_iterations );
+
+      double final_res_norm;
+      HYPRE_PCGGetFinalRelativeResidualNorm( solver, &final_res_norm );
+
+      const MPI_Comm comm = A->getCommunicator();
+      if( MPI::GetRank( comm ) == 0 ) {
+         std::cout << "PCG Iterations = " << num_iterations << std::endl
+                   << "Final PCG Relative Residual Norm = " << final_res_norm << std::endl;
+      }
+   }
+}
+
+HypreBiCGSTAB::HypreBiCGSTAB( MPI_Comm comm )
+{
+   HYPRE_ParCSRBiCGSTABCreate( comm, &solver );
+}
+
+HypreBiCGSTAB::HypreBiCGSTAB( const Matrices::HypreParCSRMatrix& A_ ) : HypreSolver( A_ )
+{
+   const MPI_Comm comm = A->getCommunicator();
+   HYPRE_ParCSRBiCGSTABCreate( comm, &solver );
+}
+
+HypreBiCGSTAB::~HypreBiCGSTAB()
+{
+   HYPRE_ParCSRBiCGSTABDestroy( solver );
+}
+
+void
+HypreBiCGSTAB::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   HypreSolver::setMatrix( op );
+   if( precond != nullptr )
+      precond->setMatrix( *A );
+}
+
+void
+HypreBiCGSTAB::setPreconditioner( HypreSolver& precond_ )
+{
+   precond = &precond_;
+
+   HYPRE_ParCSRBiCGSTABSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ );
+}
+
+void
+HypreBiCGSTAB::postSolveHook() const
+{
+   if( print_level > 0 ) {
+      HYPRE_Int num_iterations;
+      HYPRE_BiCGSTABGetNumIterations( solver, &num_iterations );
+
+      double final_res_norm;
+      HYPRE_BiCGSTABGetFinalRelativeResidualNorm( solver, &final_res_norm );
+
+      const MPI_Comm comm = A->getCommunicator();
+      if( MPI::GetRank( comm ) == 0 ) {
+         std::cout << "BiCGSTAB Iterations = " << num_iterations << std::endl
+                   << "Final BiCGSTAB Relative Residual Norm = " << final_res_norm << std::endl;
+      }
+   }
+}
+
+HypreGMRES::HypreGMRES( MPI_Comm comm )
+{
+   HYPRE_ParCSRGMRESCreate( comm, &solver );
+}
+
+HypreGMRES::HypreGMRES( const Matrices::HypreParCSRMatrix& A_ ) : HypreSolver( A_ )
+{
+   const MPI_Comm comm = A->getCommunicator();
+   HYPRE_ParCSRGMRESCreate( comm, &solver );
+}
+
+HypreGMRES::~HypreGMRES()
+{
+   HYPRE_ParCSRGMRESDestroy( solver );
+}
+
+void
+HypreGMRES::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   HypreSolver::setMatrix( op );
+   if( precond != nullptr )
+      precond->setMatrix( *A );
+}
+
+void
+HypreGMRES::setPreconditioner( HypreSolver& precond_ )
+{
+   precond = &precond_;
+   HYPRE_ParCSRGMRESSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ );
+}
+
+void
+HypreGMRES::postSolveHook() const
+{
+   HYPRE_Int print_level;
+   HYPRE_GMRESGetPrintLevel( solver, &print_level );
+
+   if( print_level > 0 ) {
+      HYPRE_Int num_iterations;
+      HYPRE_GMRESGetNumIterations( solver, &num_iterations );
+
+      double final_res_norm;
+      HYPRE_GMRESGetFinalRelativeResidualNorm( solver, &final_res_norm );
+
+      const MPI_Comm comm = A->getCommunicator();
+      if( MPI::GetRank( comm ) == 0 ) {
+         std::cout << "GMRES Iterations = " << num_iterations << std::endl
+                   << "Final GMRES Relative Residual Norm = " << final_res_norm << std::endl;
+      }
+   }
+}
+
+HypreFlexGMRES::HypreFlexGMRES( MPI_Comm comm )
+{
+   HYPRE_ParCSRFlexGMRESCreate( comm, &solver );
+}
+
+HypreFlexGMRES::HypreFlexGMRES( const Matrices::HypreParCSRMatrix& A_ ) : HypreSolver( A_ )
+{
+   const MPI_Comm comm = A->getCommunicator();
+   HYPRE_ParCSRFlexGMRESCreate( comm, &solver );
+}
+
+HypreFlexGMRES::~HypreFlexGMRES()
+{
+   HYPRE_ParCSRFlexGMRESDestroy( solver );
+}
+
+void
+HypreFlexGMRES::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   HypreSolver::setMatrix( op );
+   if( precond != nullptr )
+      precond->setMatrix( *A );
+}
+
+void
+HypreFlexGMRES::setPreconditioner( HypreSolver& precond_ )
+{
+   precond = &precond_;
+   HYPRE_ParCSRFlexGMRESSetPrecond( solver, precond_.solveFcn(), precond_.setupFcn(), precond_ );
+}
+
+void
+HypreFlexGMRES::postSolveHook() const
+{
+   HYPRE_Int print_level;
+   HYPRE_FlexGMRESGetPrintLevel( solver, &print_level );
+
+   if( print_level > 0 ) {
+      HYPRE_Int num_iterations;
+      HYPRE_FlexGMRESGetNumIterations( solver, &num_iterations );
+
+      double final_res_norm;
+      HYPRE_FlexGMRESGetFinalRelativeResidualNorm( solver, &final_res_norm );
+
+      const MPI_Comm comm = A->getCommunicator();
+      if( MPI::GetRank( comm ) == 0 ) {
+         std::cout << "FlexGMRES Iterations = " << num_iterations << std::endl
+                   << "Final FlexGMRES Relative Residual Norm = " << final_res_norm << std::endl;
+      }
+   }
+}
+
+HypreParaSails::HypreParaSails( MPI_Comm comm )
+{
+   reset( comm );
+}
+
+HypreParaSails::HypreParaSails( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A )
+{
+   reset( A.getCommunicator() );
+}
+
+HypreParaSails::~HypreParaSails()
+{
+   HYPRE_ParaSailsDestroy( solver );
+}
+
+void
+HypreParaSails::reset( MPI_Comm comm )
+{
+   if( solver != nullptr )
+      HYPRE_ParaSailsDestroy( solver );
+   HYPRE_ParaSailsCreate( comm, &solver );
+}
+
+void
+HypreParaSails::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   if( A != nullptr )
+      reset( A->getCommunicator() );
+   HypreSolver::setMatrix( op );
+}
+
+HypreEuclid::HypreEuclid( MPI_Comm comm )
+{
+   HYPRE_EuclidCreate( comm, &solver );
+}
+
+HypreEuclid::HypreEuclid( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A )
+{
+   HYPRE_EuclidCreate( A.getCommunicator(), &solver );
+}
+
+HypreEuclid::~HypreEuclid()
+{
+   HYPRE_EuclidDestroy( solver );
+}
+
+void
+HypreEuclid::reset( MPI_Comm comm )
+{
+   if( solver != nullptr )
+      HYPRE_EuclidDestroy( solver );
+   HYPRE_EuclidCreate( comm, &solver );
+}
+
+void
+HypreEuclid::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   if( A != nullptr )
+      reset( A->getCommunicator() );
+   HypreSolver::setMatrix( op );
+}
+
+HypreILU::HypreILU()
+{
+   reset();
+}
+
+HypreILU::HypreILU( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A )
+{
+   reset();
+}
+
+HypreILU::~HypreILU()
+{
+   HYPRE_ILUDestroy( solver );
+}
+
+void
+HypreILU::setDefaultOptions()
+{
+   // Maximum iterations; 1 iter for preconditioning
+   HYPRE_ILUSetMaxIter( solver, 1 );
+
+   // The tolerance when used as a smoother; set to 0.0 for preconditioner
+   HYPRE_ILUSetTol( solver, 0.0 );
+
+   // The type of incomplete LU used locally and globally (see class doc)
+   HYPRE_ILUSetType( solver, 0 );  // 0: ILU(k) locally and block Jacobi globally
+
+   // Fill level 'k' for ILU(k)
+   HYPRE_ILUSetLevelOfFill( solver, 0 );
+
+   // Local reordering scheme; 0 = no reordering, 1 = reverse Cuthill-McKee
+   HYPRE_ILUSetLocalReordering( solver, 1 );
+}
+
+void
+HypreILU::reset()
+{
+   if( solver != nullptr )
+      HYPRE_ILUDestroy( solver );
+   HYPRE_ILUCreate( &solver );
+   setDefaultOptions();
+}
+
+void
+HypreILU::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   reset();
+   HypreSolver::setMatrix( op );
+}
+
+HypreBoomerAMG::HypreBoomerAMG()
+{
+   reset();
+}
+
+HypreBoomerAMG::HypreBoomerAMG( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A )
+{
+   reset();
+}
+
+HypreBoomerAMG::~HypreBoomerAMG()
+{
+   HYPRE_BoomerAMGDestroy( solver );
+}
+
+void
+HypreBoomerAMG::setDefaultOptions()
+{
+   #if ! defined( HYPRE_USING_GPU )
+   // AMG coarsening options:
+   const int coarsen_type = 10;  // 10 = HMIS, 8 = PMIS, 6 = Falgout, 0 = CLJP
+   const int agg_levels = 1;     // number of aggressive coarsening levels
+   const double theta = 0.25;    // strength threshold: 0.25, 0.5, 0.8
+
+   // AMG interpolation options:
+   const int interp_type = 6;  // 6 = extended+i, 0 = classical
+   const int Pmax = 4;         // max number of elements per row in P
+
+   // AMG relaxation options:
+   const int relax_type = 8;    // 8 = l1-GS, 6 = symm. GS, 3 = GS, 18 = l1-Jacobi
+   const int relax_sweeps = 1;  // relaxation sweeps on each level
+
+   // Additional options:
+   const int print_level = 0;  // 0 = none, 1 = setup, 2 = solve, 3 = setup+solve
+   const int max_levels = 25;  // max number of levels in AMG hierarchy
+   #else
+   // AMG coarsening options:
+   const int coarsen_type = 8;  // 10 = HMIS, 8 = PMIS, 6 = Falgout, 0 = CLJP
+   const int agg_levels = 0;    // number of aggressive coarsening levels
+   const double theta = 0.25;   // strength threshold: 0.25, 0.5, 0.8
+
+   // AMG interpolation options:
+   const int interp_type = 6;  // 6 = extended+i, or 18 = extended+e
+   const int Pmax = 4;         // max number of elements per row in P
+
+   // AMG relaxation options:
+   const int relax_type = 18;   // 18 = l1-Jacobi, or 16 = Chebyshev
+   const int relax_sweeps = 1;  // relaxation sweeps on each level
+
+   // Additional options:
+   const int print_level = 0;  // 0 = none, 1 = setup, 2 = solve, 3 = setup+solve
+   const int max_levels = 25;  // max number of levels in AMG hierarchy
+   #endif
+
+   HYPRE_BoomerAMGSetCoarsenType( solver, coarsen_type );
+   HYPRE_BoomerAMGSetAggNumLevels( solver, agg_levels );
+   HYPRE_BoomerAMGSetRelaxType( solver, relax_type );
+   HYPRE_BoomerAMGSetNumSweeps( solver, relax_sweeps );
+   HYPRE_BoomerAMGSetStrongThreshold( solver, theta );
+   HYPRE_BoomerAMGSetInterpType( solver, interp_type );
+   HYPRE_BoomerAMGSetPMaxElmts( solver, Pmax );
+   HYPRE_BoomerAMGSetPrintLevel( solver, print_level );
+   HYPRE_BoomerAMGSetMaxLevels( solver, max_levels );
+
+   // Use as a preconditioner (one V-cycle, zero tolerance)
+   HYPRE_BoomerAMGSetMaxIter( solver, 1 );
+   HYPRE_BoomerAMGSetTol( solver, 0.0 );
+}
+
+void
+HypreBoomerAMG::reset()
+{
+   if( solver != nullptr )
+      HYPRE_BoomerAMGDestroy( solver );
+   HYPRE_BoomerAMGCreate( &solver );
+   setDefaultOptions();
+}
+
+void
+HypreBoomerAMG::setMatrix( const Matrices::HypreParCSRMatrix& op )
+{
+   reset();
+   HypreSolver::setMatrix( op );
+}
+
+void
+HypreBoomerAMG::setSystemsOptions( int dim, bool order_bynodes )
+{
+   HYPRE_BoomerAMGSetNumFunctions( solver, dim );
+
+   // The default "system" ordering in Hypre is byVDIM. When using byNODES
+   // ordering, we have to specify the ordering explicitly.
+   if( order_bynodes ) {
+      TNL_ASSERT_TRUE( A->getRows() % dim == 0, "Ordering does not work as claimed!" );
+      const HYPRE_Int nnodes = A->getRows() / dim;
+
+      // generate DofFunc mapping on the host
+      HYPRE_Int* h_mapping = hypre_CTAlloc( HYPRE_Int, A->getRows(), HYPRE_MEMORY_HOST );
+      HYPRE_Int k = 0;
+      for( int i = 0; i < dim; i++ )
+         for( HYPRE_Int j = 0; j < nnodes; j++ )
+            h_mapping[ k++ ] = i;
+
+   #if defined( HYPRE_USING_GPU )
+      // the mapping is assumed to be a device pointer
+      HYPRE_Int* mapping = hypre_CTAlloc( HYPRE_Int, A->getRows(), HYPRE_MEMORY_DEVICE );
+      hypre_TMemcpy( mapping, h_mapping, HYPRE_Int, A->getRows(), HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST );
+      hypre_TFree( h_mapping, HYPRE_MEMORY_HOST );
+   #else
+      HYPRE_Int* mapping = h_mapping;
+   #endif
+
+      // Hypre takes ownership and frees the pointer in HYPRE_BoomerAMGDestroy
+      HYPRE_BoomerAMGSetDofFunc( solver, mapping );
+   }
+
+   // more robust options with respect to convergence
+   HYPRE_BoomerAMGSetAggNumLevels( solver, 0 );
+   HYPRE_BoomerAMGSetStrongThreshold( solver, 0.5 );
+}
+
+void
+HypreBoomerAMG::setAdvectiveOptions( int restrict_type, int relax_order )
+{
+   HYPRE_BoomerAMGSetRestriction( solver, restrict_type );
+   HYPRE_BoomerAMGSetRelaxOrder( solver, relax_order );
+
+   const int interp_type = 100;
+   const int relax_type = 8;
+   const int coarsen_type = 6;
+   const double strength_tolC = 0.1;
+   const double strength_tolR = 0.01;
+   const double filter_tolR = 0.0;
+   const double filterA_tol = 0.0;
+
+   HYPRE_BoomerAMGSetInterpType( solver, interp_type );
+   HYPRE_BoomerAMGSetRelaxType( solver, relax_type );
+   HYPRE_BoomerAMGSetCoarsenType( solver, coarsen_type );
+   HYPRE_BoomerAMGSetStrongThreshold( solver, strength_tolC );
+   HYPRE_BoomerAMGSetStrongThresholdR( solver, strength_tolR );
+   HYPRE_BoomerAMGSetFilterThresholdR( solver, filter_tolR );
+   HYPRE_BoomerAMGSetADropTol( solver, filterA_tol );
+   // type = -1: drop based on row inf-norm
+   HYPRE_BoomerAMGSetADropType( solver, -1 );
+
+   // disable aggressive coarsening
+   HYPRE_BoomerAMGSetAggNumLevels( solver, 0 );
+
+   // set number of sweeps (up and down cycles, and the coarsest level)
+   HYPRE_BoomerAMGSetNumSweeps( solver, 1 );
+}
+
+void
+HypreBoomerAMG::postSolveHook() const
+{
+   HYPRE_Int print_level;
+   HYPRE_BoomerAMGGetPrintLevel( solver, &print_level );
+
+   if( print_level > 1 ) {
+      HYPRE_Int num_iterations;
+      HYPRE_BoomerAMGGetNumIterations( solver, &num_iterations );
+
+      double final_res_norm;
+      HYPRE_BoomerAMGGetFinalRelativeResidualNorm( solver, &final_res_norm );
+
+      const MPI_Comm comm = A->getCommunicator();
+      if( MPI::GetRank( comm ) == 0 ) {
+         std::cout << "BoomerAMG Iterations = " << num_iterations << std::endl
+                   << "Final BoomerAMG Relative Residual Norm = " << final_res_norm << std::endl;
+      }
+   }
+}
+
+}  // namespace Linear
+}  // namespace Solvers
+}  // namespace TNL
+
+#endif  // HAVE_HYPRE
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
index 0deb077d307abfa4356f1f7ec956a5aa599f3da3..84b2ceeaae2e37540e3a34618c5c5f19e07e6497 100644
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -54,3 +54,23 @@ if( ZLIB_FOUND )
       add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} )
    endforeach()
 endif()
+
+
+if( ${BUILD_MPI} )
+   find_package(Hypre)
+
+   if( ${HYPRE_FOUND} )
+      foreach( target IN ITEMS HypreTest )
+         add_executable( ${target} ${target}.cpp )
+         target_compile_definitions( ${target} PUBLIC "-DHAVE_HYPRE" )
+         target_compile_options( ${target} PRIVATE ${CXX_TESTS_FLAGS} )
+         target_include_directories( ${target} PUBLIC ${HYPRE_INCLUDE_DIRS} )
+         target_link_libraries( ${target} ${TESTS_LIBRARIES} ${HYPRE_LIBRARIES} )
+         target_link_options( ${target} PRIVATE ${TESTS_LINKER_FLAGS} )
+
+         set( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX}" )
+         add_test( NAME ${target} COMMAND "mpirun" ${mpi_test_parameters})
+         add_test( ${target}_nodistr ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} )
+      endforeach()
+   endif()
+endif()
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index b6fa763a9c885a43074cbd15d73cdff76919574d..19186feb3b7f4bad60153f6943b02c09d5880a10 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -35,6 +35,12 @@ struct MyData
    // operator used in tests, not necessary for Array to work
    template< typename T >
    bool operator==( T v ) const { return data == v; }
+
+   // operator used in ArrayIO::loadSubrange (due to casting requested from the tests)
+   operator double() const
+   {
+      return data;
+   }
 };
 
 std::ostream& operator<<( std::ostream& str, const MyData& v )
@@ -646,6 +652,48 @@ TYPED_TEST( ArrayTest, SaveAndLoad )
    EXPECT_EQ( std::remove( TEST_FILE_NAME ), 0 );
 }
 
+TYPED_TEST( ArrayTest, SaveAndLoadSubrangeWithCast )
+{
+   using ArrayType = typename TestFixture::ArrayType;
+   using Value = typename ArrayType::ValueType;
+   using Index = typename ArrayType::IndexType;
+   using namespace TNL::Containers::detail;
+
+   ArrayType v;
+   v.setSize( 100 );
+   for( int i = 0; i < 100; i ++ )
+      v.setElement( i, i );
+   ASSERT_NO_THROW( File( TEST_FILE_NAME, std::ios_base::out ) << v );
+
+   const int offset = 25;
+   const int subrangeSize = 50;
+   using CastValue = short int;
+   Array< CastValue, typename ArrayType::DeviceType, long > array;
+   array.setSize( subrangeSize );
+   File file( TEST_FILE_NAME, std::ios_base::in );
+   {
+      // read type
+      const std::string type = getObjectType( file );
+      ASSERT_EQ( type, ArrayType::getSerializationType() );
+      // read size
+      std::size_t elementsInFile;
+      file.load( &elementsInFile );
+      EXPECT_EQ( elementsInFile, static_cast< std::size_t >( v.getSize() ) );
+      // read data, cast from Value to short int
+      using IO = ArrayIO< CastValue, Index, typename Allocators::Default< typename ArrayType::DeviceType >::template Allocator< CastValue > >;
+      // hack for the test...
+      if( getType< Value >() == "MyData" )
+         IO::loadSubrange( file, elementsInFile, offset, array.getData(), array.getSize(), "double" );
+      else
+         IO::loadSubrange( file, elementsInFile, offset, array.getData(), array.getSize(), getType< Value >() );
+   }
+   for( Index i = 0; i < subrangeSize; i++ ) {
+      EXPECT_EQ( array.getElement( i ), offset + i );
+   }
+
+   EXPECT_EQ( std::remove( TEST_FILE_NAME ), 0 );
+}
+
 TYPED_TEST( ArrayTest, LoadViaView )
 {
    using ArrayType = typename TestFixture::ArrayType;
diff --git a/src/UnitTests/FileTest.h b/src/UnitTests/FileTest.h
index 7af5fb286ee1f831b2db5c98d0acd119c5ec60ab..dee033c5e1549b42922407ef181f13781dc9e735 100644
--- a/src/UnitTests/FileTest.h
+++ b/src/UnitTests/FileTest.h
@@ -18,11 +18,13 @@ TEST( FileTest, WriteAndRead )
    File file;
    ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out ) );
 
-   int intData( 5 );
+   int intData = 5;
    double doubleData[ 3 ] = { 1.0, 2.0, 3.0 };
+   const int ignoredValue = 42;
    const double constDoubleData = 3.14;
    ASSERT_NO_THROW( file.save( &intData ) );
    ASSERT_NO_THROW( file.save( doubleData, 3 ) );
+   ASSERT_NO_THROW( file.save( &ignoredValue ) );
    ASSERT_NO_THROW( file.save( &constDoubleData ) );
    ASSERT_NO_THROW( file.close() );
 
@@ -32,6 +34,7 @@ TEST( FileTest, WriteAndRead )
    double newConstDoubleData;
    ASSERT_NO_THROW( file.load( &newIntData, 1 ) );
    ASSERT_NO_THROW( file.load( newDoubleData, 3 ) );
+   ASSERT_NO_THROW( file.ignore< decltype(ignoredValue) >( 1 ) );
    ASSERT_NO_THROW( file.load( &newConstDoubleData, 1 ) );
 
    EXPECT_EQ( newIntData, intData );
diff --git a/src/UnitTests/HypreTest.cpp b/src/UnitTests/HypreTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..45d1c5d2d8d5e884d62020a04f844bf92d2d572a
--- /dev/null
+++ b/src/UnitTests/HypreTest.cpp
@@ -0,0 +1,490 @@
+#ifdef HAVE_GTEST
+
+#include <gtest/gtest.h>
+#include "Containers/VectorHelperFunctions.h"
+
+#ifdef HAVE_HYPRE
+
+#include <TNL/Containers/HypreVector.h>
+#include <TNL/Containers/HypreParVector.h>
+#include <TNL/Matrices/HypreCSRMatrix.h>
+#include <TNL/Matrices/HypreParCSRMatrix.h>
+#include <TNL/Solvers/Linear/Hypre.h>
+
+#include <TNL/Containers/Partitioner.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+using namespace TNL;
+using namespace TNL::Solvers::Linear;
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
+TEST( HypreTest, Vector )
+{
+   TNL::Hypre hypre;
+
+   constexpr int size = 10;
+
+   using Vector = Containers::Vector< double, HYPRE_Device >;
+   Vector v( size, 2 );
+
+   Containers::HypreVector a;
+   a.setSize( size );
+   EXPECT_TRUE( a.getData() );
+   EXPECT_EQ( a.getSize(), size );
+   a.setValue( 2 );
+   EXPECT_EQ( a.getConstView(), v );
+   EXPECT_EQ( a.getView(), v );
+
+   Containers::HypreVector b;
+   b.bind( a );
+   EXPECT_EQ( b.getData(), a.getData() );
+   EXPECT_EQ( b.getSize(), a.getSize() );
+
+   a.bind( v );
+   EXPECT_EQ( a.getData(), v.getData() );
+   EXPECT_EQ( a.getSize(), v.getSize() );
+
+   // test move-constructor and move-assignment
+   Containers::HypreVector c = std::move( b );
+   b = std::move( c );
+}
+
+TEST( HypreTest, AssumedPartitionCheck )
+{
+   // this should always return 1:
+   // https://github.com/hypre-space/hypre/blob/master/src/utilities/ap.c
+   ASSERT_EQ( HYPRE_AssumedPartitionCheck(), 1 );
+}
+
+template< typename DistributedArray >
+auto getDistributedArray( MPI_Comm communicator,
+                          typename DistributedArray::IndexType globalSize,
+                          typename DistributedArray::IndexType ghosts )
+{
+   DistributedArray array;
+
+   using LocalRangeType = typename DistributedArray::LocalRangeType;
+   const LocalRangeType localRange = Containers::Partitioner< typename DistributedArray::IndexType >::splitRange( globalSize, communicator );
+   array.setDistribution( localRange, ghosts, globalSize, communicator );
+
+   using Synchronizer = typename Containers::Partitioner< typename DistributedArray::IndexType >::template ArraySynchronizer< typename DistributedArray::DeviceType >;
+   array.setSynchronizer( std::make_shared<Synchronizer>( localRange, ghosts / 2, communicator ) );
+
+   return array;
+}
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
+TEST( HypreTest, ParVector )
+{
+   TNL::Hypre hypre;
+
+   const MPI_Comm communicator = MPI_COMM_WORLD;
+   const int globalSize = 97;  // prime number to force non-uniform distribution
+   // some arbitrary even value (but must be 0 if not distributed)
+   const int ghosts = (TNL::MPI::GetSize(communicator) > 1) ? 4 : 0;
+
+   using DistributedVector = Containers::DistributedVector< HYPRE_Real, HYPRE_Device, HYPRE_BigInt >;
+   auto v = getDistributedArray< DistributedVector >( communicator, globalSize, ghosts );
+   v.setValue( 2 );
+
+   auto a = getDistributedArray< Containers::HypreParVector >( communicator, globalSize, ghosts );
+   EXPECT_EQ( a.getSize(), v.getSize() );
+   EXPECT_EQ( a.getLocalRange(), v.getLocalRange() );
+   a.setValue( 2 );
+   EXPECT_EQ( a.getConstView(), v );
+   EXPECT_EQ( a.getView(), v );
+
+   Containers::HypreParVector b;
+   b.bind( v );
+   EXPECT_EQ( b.getSize(), v.getSize() );
+   EXPECT_EQ( b.getLocalView().getData(), v.getLocalView().getData() );
+   EXPECT_EQ( b.getLocalView(), v.getLocalView() );
+
+   // test move-constructor and move-assignment
+   Containers::HypreParVector c = std::move( b );
+   b = std::move( c );
+}
+
+/***
+ * Creates the following matrix (dots represent zero matrix elements):
+ *
+ *   /  2.5 -1    .    .    .   \
+ *   | -1    2.5 -1    .    .   |
+ *   |  .   -1    2.5 -1.   .   |
+ *   |  .    .   -1    2.5 -1   |
+ *   \  .    .    .   -1    2.5 /
+ */
+template< typename MatrixType >
+MatrixType getGlobalMatrix( int size )
+{
+   using Vector = Containers::Vector< typename MatrixType::RealType, typename MatrixType::DeviceType, typename MatrixType::IndexType >;
+   MatrixType matrix;
+   matrix.setDimensions( size, size );
+   Vector capacities( size, 3 );
+   capacities.setElement( 0, 2 );
+   capacities.setElement( size - 1, 2 );
+   matrix.setRowCapacities( capacities );
+
+   auto f = [=] __cuda_callable__ ( typename MatrixType::ViewType::RowView& row ) mutable {
+      const int rowIdx = row.getRowIndex();
+      if( rowIdx == 0 )
+      {
+         row.setElement( 0, rowIdx,    2.5 );    // diagonal element
+         row.setElement( 1, rowIdx+1, -1 );      // element above the diagonal
+      }
+      else if( rowIdx == size - 1 )
+      {
+         row.setElement( 0, rowIdx-1, -1.0 );    // element below the diagonal
+         row.setElement( 1, rowIdx,    2.5 );    // diagonal element
+      }
+      else
+      {
+         row.setElement( 0, rowIdx-1, -1.0 );    // element below the diagonal
+         row.setElement( 1, rowIdx,    2.5 );    // diagonal element
+         row.setElement( 2, rowIdx+1, -1.0 );    // element above the diagonal
+      }
+   };
+   matrix.getView().forAllRows( f );
+
+   return matrix;
+}
+
+// returns only a local block of the global matrix created by getGlobalMatrix
+template< typename MatrixType >
+MatrixType getLocalBlock( typename MatrixType::IndexType global_size,
+                          Containers::Subrange< typename MatrixType::IndexType > local_row_range )
+{
+   using Vector = Containers::Vector< typename MatrixType::RealType, typename MatrixType::DeviceType, typename MatrixType::IndexType >;
+   MatrixType matrix;
+   matrix.setDimensions( local_row_range.getSize(), global_size );
+   Vector capacities( local_row_range.getSize(), 3 );
+   if( local_row_range.getBegin() == 0 )
+      capacities.setElement( 0, 2 );
+   if( local_row_range.getEnd() == global_size )
+      capacities.setElement( local_row_range.getSize() - 1, 2 );
+   matrix.setRowCapacities( capacities );
+
+   auto f = [=] __cuda_callable__ ( typename MatrixType::ViewType::RowView& row ) mutable {
+      const int rowIdx = row.getRowIndex();
+      const int colIdx = local_row_range.getBegin() + rowIdx;
+      if( colIdx == 0 )
+      {
+         row.setElement( 0, colIdx,    2.5 );    // diagonal element
+         row.setElement( 1, colIdx+1, -1 );      // element above the diagonal
+      }
+      else if( colIdx == global_size - 1 )
+      {
+         row.setElement( 0, colIdx-1, -1.0 );    // element below the diagonal
+         row.setElement( 1, colIdx,    2.5 );    // diagonal element
+      }
+      else
+      {
+         row.setElement( 0, colIdx-1, -1.0 );    // element below the diagonal
+         row.setElement( 1, colIdx,    2.5 );    // diagonal element
+         row.setElement( 2, colIdx+1, -1.0 );    // element above the diagonal
+      }
+   };
+   matrix.getView().forAllRows( f );
+
+   return matrix;
+}
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
+TEST( HypreTest, CSRMatrix )
+{
+   TNL::Hypre hypre;
+
+   constexpr int size = 97;
+
+   using MatrixType = Matrices::SparseMatrix< double, HYPRE_Device >;
+   MatrixType matrix = getGlobalMatrix< MatrixType >( size );
+
+   Matrices::HypreCSRMatrix a = getGlobalMatrix< Matrices::HypreCSRMatrix >( size );
+   EXPECT_GT( a.getValues().getSize(), 0 );
+   EXPECT_GT( a.getColumnIndexes().getSize(), 0 );
+   EXPECT_GT( a.getRowOffsets().getSize(), 0 );
+   EXPECT_EQ( a.getRows(), size );
+   EXPECT_EQ( a.getColumns(), size );
+   EXPECT_EQ( a.getConstView(), matrix );
+   EXPECT_EQ( a.getView(), matrix );
+
+   Matrices::HypreCSRMatrix b;
+   b.bind( matrix );
+   EXPECT_EQ( b.getRows(), size );
+   EXPECT_EQ( b.getColumns(), size );
+   EXPECT_EQ( b.getConstView(), matrix );
+   EXPECT_EQ( b.getView(), matrix );
+
+   // test move-constructor and move-assignment
+   Matrices::HypreCSRMatrix c = std::move( b );
+   b = std::move( c );
+}
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
+TEST( HypreTest, ParCSRMatrix_wrapCSRMatrix )
+{
+   TNL::Hypre hypre;
+
+   constexpr int size = 97;
+   Matrices::HypreCSRMatrix matrix = getGlobalMatrix< Matrices::HypreCSRMatrix >( size );
+
+   auto a = Matrices::HypreParCSRMatrix::wrapCSRMatrix( matrix );
+   EXPECT_EQ( a.getRows(), size );
+   EXPECT_EQ( a.getColumns(), size );
+   EXPECT_EQ( a.getLocalRowRange(), Containers::Subrange< HYPRE_BigInt >( 0, size ) );
+   EXPECT_EQ( a.getLocalColumnRange(), Containers::Subrange< HYPRE_BigInt >( 0, size ) );
+   EXPECT_EQ( a.getNonzeroElementsCount(), a.getDiagonalBlock().getNonzeroElementsCount() );
+
+   Matrices::HypreCSRMatrix a_diag = a.getDiagonalBlock();
+   // the matrices are not elementwise-equal, ParCSR is reordered such that the
+   // diagonal elements are first in the rows
+//   EXPECT_EQ( a_diag.getValues(), matrix.getValues() );
+//   EXPECT_EQ( a_diag.getColumnIndexes(), matrix.getColumnIndexes() );
+//   EXPECT_EQ( a_diag.getRowOffsets(), matrix.getRowOffsets() );
+   EXPECT_EQ( a_diag.getValues().getSize(), matrix.getValues().getSize() );
+   EXPECT_EQ( a_diag.getColumnIndexes().getSize(), matrix.getColumnIndexes().getSize() );
+   EXPECT_EQ( a_diag.getRowOffsets().getSize(), matrix.getRowOffsets().getSize() );
+   EXPECT_EQ( a_diag.getRows(), size );
+   EXPECT_EQ( a_diag.getColumns(), size );
+//   EXPECT_EQ( a_diag.getConstView(), matrix );
+//   EXPECT_EQ( a_diag.getView(), matrix );
+
+   Matrices::HypreCSRMatrix a_offd = a.getOffdiagonalBlock();
+   EXPECT_EQ( a_offd.getValues().getSize(), 0 );
+   EXPECT_EQ( a_offd.getColumnIndexes().getSize(), 0 );
+   EXPECT_EQ( a_offd.getRowOffsets().getSize(), 0 );
+   EXPECT_EQ( a_offd.getRows(), size );
+   EXPECT_EQ( a_offd.getColumns(), 0 );
+
+   // test move-constructor and move-assignment
+   Matrices::HypreParCSRMatrix c = std::move( a );
+   a = std::move( c );
+}
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
+TEST( HypreTest, ParCSRMatrix_fromMasterRank )
+{
+   TNL::Hypre hypre;
+
+   const int rank = TNL::MPI::GetRank( MPI_COMM_WORLD );
+   const int nproc = TNL::MPI::GetSize( MPI_COMM_WORLD );
+
+   constexpr int global_size = 97;
+   Matrices::HypreCSRMatrix matrix = getGlobalMatrix< Matrices::HypreCSRMatrix >( global_size );
+
+   using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >;
+   auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 );
+   Containers::HypreParVector hypre_x;
+   hypre_x.bind( x );
+
+   auto a = Matrices::HypreParCSRMatrix::fromMasterRank( matrix, hypre_x, hypre_x );
+   EXPECT_EQ( a.getRows(), global_size );
+   EXPECT_EQ( a.getColumns(), global_size );
+   EXPECT_EQ( a.getLocalRowRange(), x.getLocalRange() );
+   EXPECT_EQ( a.getLocalColumnRange(), x.getLocalRange() );
+
+   Matrices::HypreCSRMatrix a_diag = a.getDiagonalBlock();
+   const int local_size = x.getLocalRange().getSize();
+   EXPECT_EQ( a_diag.getRowOffsets().getSize(), local_size + 1 );
+   EXPECT_EQ( a_diag.getRows(), local_size );
+   EXPECT_EQ( a_diag.getColumns(), local_size );
+
+   Matrices::HypreCSRMatrix a_offd = a.getOffdiagonalBlock();
+   EXPECT_EQ( a_offd.getRowOffsets().getSize(), local_size + 1 );
+   EXPECT_EQ( a_offd.getRows(), local_size );
+   const int offd_cols = ( nproc == 1 ) ? 0 : ( rank > 0 && rank < nproc - 1 ) ? 2 : 1;
+   EXPECT_EQ( a_offd.getColumns(), offd_cols );
+
+   auto col_map_offd = a.getOffdiagonalColumnsMapping();
+   EXPECT_EQ( col_map_offd.getSize(), offd_cols );
+
+   // test move-constructor and move-assignment
+   Matrices::HypreParCSRMatrix c = std::move( a );
+   a = std::move( c );
+}
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
+TEST( HypreTest, ParCSRMatrix_fromLocalBlocks )
+{
+   TNL::Hypre hypre;
+
+   const int rank = TNL::MPI::GetRank( MPI_COMM_WORLD );
+   const int nproc = TNL::MPI::GetSize( MPI_COMM_WORLD );
+
+   constexpr int global_size = 97;
+
+   using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >;
+   auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 );
+
+   Matrices::HypreCSRMatrix matrix = getLocalBlock< Matrices::HypreCSRMatrix >( global_size, x.getLocalRange() );
+
+   auto a = Matrices::HypreParCSRMatrix::fromLocalBlocks( MPI_COMM_WORLD, global_size, global_size, x.getLocalRange(), x.getLocalRange(), matrix );
+   EXPECT_EQ( a.getRows(), global_size );
+   EXPECT_EQ( a.getColumns(), global_size );
+   EXPECT_EQ( a.getLocalRowRange(), x.getLocalRange() );
+   EXPECT_EQ( a.getLocalColumnRange(), x.getLocalRange() );
+
+   Matrices::HypreCSRMatrix a_diag = a.getDiagonalBlock();
+   const int local_size = x.getLocalRange().getSize();
+   EXPECT_EQ( a_diag.getRowOffsets().getSize(), local_size + 1 );
+   EXPECT_EQ( a_diag.getRows(), local_size );
+   EXPECT_EQ( a_diag.getColumns(), local_size );
+
+   Matrices::HypreCSRMatrix a_offd = a.getOffdiagonalBlock();
+   EXPECT_EQ( a_offd.getRowOffsets().getSize(), local_size + 1 );
+   EXPECT_EQ( a_offd.getRows(), local_size );
+   const int offd_cols = ( nproc == 1 ) ? 0 : ( rank > 0 && rank < nproc - 1 ) ? 2 : 1;
+   EXPECT_EQ( a_offd.getColumns(), offd_cols );
+
+   auto col_map_offd = a.getOffdiagonalColumnsMapping();
+   EXPECT_EQ( col_map_offd.getSize(), offd_cols );
+
+   Matrices::HypreCSRMatrix a_local = a.getMergedLocalMatrix();
+   EXPECT_EQ( a_local.getRows(), matrix.getRows() );
+   EXPECT_EQ( a_local.getColumns(), matrix.getColumns() );
+   EXPECT_EQ( a_local.getNonzeroElementsCount(), matrix.getNonzeroElementsCount() );
+   // TODO: the merged local matrix still has the diagonal element as the first entry per row
+   // and we can't use hypre_CSRMatrixReorder on the original block, because it is not square
+//   hypre_CSRMatrixReorder( matrix );
+//   EXPECT_EQ( a_local.getView(), matrix.getView() );
+
+   // test move-constructor and move-assignment
+   Matrices::HypreParCSRMatrix c = std::move( a );
+   a = std::move( c );
+}
+
+
+void solve( Matrices::HypreParCSRMatrix& A,
+            Containers::HypreParVector& x,
+            Containers::HypreParVector& b )
+{
+   // create the preconditioner
+   HypreDiagScale precond;
+//   HypreParaSails precond( A.getCommunicator() );
+//   HypreEuclid precond( A.getCommunicator() );
+//   HypreILU precond;
+//   HypreBoomerAMG precond;
+
+   // initialize the Hypre solver
+   HyprePCG solver( A.getCommunicator() );
+//   solver.setPrintLevel( 1 );
+   solver.setPreconditioner( precond );
+   solver.setMatrix( A );
+   solver.setTol( 1e-9 );
+   solver.setResidualConvergenceOptions( -1, 1e-9 );
+
+   // solve the linear system
+   solver.solve( b, x );
+}
+
+TEST( HypreTest, solve_seq )
+{
+   TNL::Hypre hypre;
+
+   constexpr int size = 97;
+
+   // create the global matrix
+   using MatrixType = Matrices::HypreCSRMatrix;
+   auto global_matrix = getGlobalMatrix< MatrixType >( size );
+
+   // set the dofs and right-hand-side vectors
+   using Vector = Containers::Vector< double, HYPRE_Device >;
+   Vector x( size, 1.0 );
+   Vector b( size );
+   global_matrix.getView().vectorProduct( x, b );
+   x = 0.0;
+
+   // bind parallel Hypre vectors
+   Containers::HypreParVector hypre_x;
+   Containers::HypreParVector hypre_b;
+   hypre_x.bind( {0, size}, 0, size, MPI_COMM_SELF, x.getView() );
+   hypre_b.bind( {0, size}, 0, size, MPI_COMM_SELF, b.getView() );
+
+   // convert the matrix to HypreParCSR
+   HYPRE_BigInt row_starts[ 2 ];
+   row_starts[ 0 ] = 0;
+   row_starts[ 1 ] = size;
+   auto matrix = Matrices::HypreParCSRMatrix::fromMasterRank( MPI_COMM_SELF, row_starts, row_starts, global_matrix );
+
+   // solve the linear system
+   solve( matrix, hypre_x, hypre_b );
+
+   // verify the solution
+   expect_near( x, 1, 1e-8 );
+}
+
+TEST( HypreTest, solve_distributed_fromMasterRank )
+{
+   TNL::Hypre hypre;
+
+   constexpr int global_size = 97;
+
+   // create the dofs and right-hand-side vectors
+   using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >;
+   auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 );
+   auto b = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 );
+
+   // bind parallel Hypre vectors
+   Containers::HypreParVector hypre_x;
+   Containers::HypreParVector hypre_b;
+   hypre_x.bind( x );
+   hypre_b.bind( b );
+
+   // create the global matrix
+   using MatrixType = Matrices::HypreCSRMatrix;
+   auto global_matrix = getGlobalMatrix< MatrixType >( global_size );
+
+   // distribute the matrix to Hypre format (rank 0 -> all ranks)
+   auto matrix = Matrices::HypreParCSRMatrix::fromMasterRank( global_matrix, hypre_x, hypre_b );
+
+   // set the right-hand-side
+   x.setValue( 1 );
+   HYPRE_ParCSRMatrixMatvec( 1.0, matrix, hypre_x, 0.0, hypre_b );
+   x.setValue( 0 );
+
+   // solve the linear system
+   solve( matrix, hypre_x, hypre_b );
+
+   // verify the solution
+   expect_near( x.getLocalView(), 1, 1e-8 );
+}
+
+TEST( HypreTest, solve_distributed_fromLocalBlock )
+{
+   TNL::Hypre hypre;
+
+   constexpr int global_size = 97;
+
+   // create the dofs and right-hand-side vectors
+   using DistributedVector = Containers::DistributedVector< double, HYPRE_Device >;
+   auto x = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 );
+   auto b = getDistributedArray< DistributedVector >( MPI_COMM_WORLD, global_size, 2 );
+
+   // bind parallel Hypre vectors
+   Containers::HypreParVector hypre_x;
+   Containers::HypreParVector hypre_b;
+   hypre_x.bind( x );
+   hypre_b.bind( b );
+
+   // create the local matrix block
+   Matrices::HypreCSRMatrix local_matrix = getLocalBlock< Matrices::HypreCSRMatrix >( global_size, x.getLocalRange() );
+
+   // create the distributed matrix
+   auto matrix = Matrices::HypreParCSRMatrix::fromLocalBlocks( MPI_COMM_WORLD, global_size, global_size, x.getLocalRange(), x.getLocalRange(), local_matrix );
+
+   // set the right-hand-side
+   x.setValue( 1 );
+   HYPRE_ParCSRMatrixMatvec( 1.0, matrix, hypre_x, 0.0, hypre_b );
+   x.setValue( 0 );
+
+   // solve the linear system
+   solve( matrix, hypre_x, hypre_b );
+
+   // verify the solution
+   expect_near( x.getLocalView(), 1, 1e-8 );
+}
+
+#endif
+#endif
+
+#include "main_mpi.h"