@@ -81,16 +81,18 @@ The simplest preconditioning techniques are based on stationary methods such as
Another traditional class of generic preconditioners are incomplete factorization techniques, such as incomplete Cholesky (IC) and incomplete LU (ILU), which bring ideas developed for direct solution of sparse linear systems to iterative methods.
There are multiple strategies to control the fill-in by selecting the sparsity pattern of the factors either statically based on the matrix entry positions or dynamically based on the matrix entry values \cite{saad:1994ilut,li:2003crout,bollhofer:2001,mittal:2003}.
Additionally, algebraic multilevel techniques originating from domain decomposition and multigrid methods were applied also to incomplete factorization methods \cite{saad:1999bilutm,botta:1999,henon:2006} and algorithmic variants were developed to address parallelization and scalability issues \cite{karypis:1997,hysom:1999,hysom:2001,wang:2003,anzt:2018parilut,coleman:2018}.
Several incomplete factorization algorithms were also ported to GPU accelerators \cite{naumov:2011,naumov:2015}, though their efficiency remains limited compared to other preconditioning techniques.
Additionally, algebraic multilevel techniques originating from domain decomposition and multigrid methods were applied also to incomplete factorization methods \cite{saad:1999bilutm,botta:1999,henon:2006} and algorithmic variants were developed to address parallelization and scalability issues \cite{karypis:1997,hysom:1999,hysom:2001,wang:2003,anzt:2018parilut,coleman:2018,chow:2015}.
Several incomplete factorization algorithms were also ported to GPU accelerators \cite{naumov:2011,naumov:2015}, though their efficiency remains limited compared to other preconditioning techniques due to two sparse triangular solves per each preconditioning step.
Multiple iterative or otherwise inexact techniques were proposed to address this issue \cite{anzt:2015,anzt:2016,ma:2021,shioya:2021}.
Finally, several approaches to incrementally update an existing factorization were developed \cite{calgaro:2010,anzt:2016b}.
Sparse approximate inverse preconditioners provide an alternative approach to remedy problems arising when incomplete factorizations are applied to indefinite systems or systems that are not diagonally dominant \cite{benzi:2002,saad:2003iterative}.
They also attracted attention thanks to their potential in parallel environments \cite{tang:1999}, including GPU accelerators \cite{dehnavi:2013,oyarzun:2014,bertaccini:2016,bernaschi:2019}.
They also attracted attention thanks to their potential in parallel environments \cite{grote:1997,tang:1999}, including GPU accelerators \cite{dehnavi:2013,oyarzun:2014,bertaccini:2016,bernaschi:2019}.
Approximate inverse preconditioners may be applied to symmetric as well as non-symmetric systems and they may be computed in a non-factored form where the preconditioner is expressed as a single matrix, or in a factored form where the preconditioner is expressed as a product of two or more matrices.
For both classes there exist multiple completely different approaches to compute the approximate inverse or approximate inverse factors, the two main approaches are Frobenius norm minimization \cite{chow:1994,chow:2000,kolotilina:1993,kolotilina:1995,kolotilina:2000,kolotilina:1999} and incomplete bi-conjugation \cite{benzi:1996,benzi:1998,bridson:2000,bertaccini:2016}.
The sparse approximate inverse algorithms based on bi-conjugation show algebraic behavior that is similar to the incomplete LU factorizations \cite{bollhofer:2002a,bollhofer:2002b}.
When matrix values are changed but the sparsity pattern remains the same, it is possible to reuse an existing approximate inverse preconditioner to compute the updated approximate inverse more efficiently \cite{kolotilina:2000,bridson:2000}.
Several dynamic pattern selection strategies and multilevel techniques for sparse approximate inverse preconditioners have been recently investigated \cite{janna:2011,janna:2015,bernaschi:2019,franceschini:2018}.
Several dynamic pattern selection strategies and multilevel techniques for sparse approximate inverse preconditioners have been recently investigated \cite{janna:2011,janna:2015,bernaschi:2019,franceschini:2018,kopal:2017}.
Polynomial preconditioning is closely related to the development of Krylov methods.
It utilizes spectral information of the linear system matrix and is favorable for parallel architectures where the sparse matrix--vector multiplication delivers high performance.
abstract={This paper presents a new fine-grained parallel algorithm for computing an incomplete LU factorization. All nonzeros in the incomplete factors can be computed in parallel and asynchronously, using one or more sweeps that iteratively improve the accuracy of the factorization. Unlike existing parallel algorithms, the amount of parallelism is large irrespective of the ordering of the matrix, and matrix ordering can be used to enhance the accuracy of the factorization rather than to increase parallelism. Numerical tests show that very few sweeps are needed to construct a factorization that is an effective preconditioner.},
doi={10.1137/140968896},
publisher={SIAM},
}
@TechReport{naumov:2011,
title={Incomplete-{LU} and {Cholesky} preconditioned iterative methods using {CUSPARSE} and {CUBLAS}},
author={Naumov, Maxim},
@@ -595,6 +608,78 @@
keywords={manual},
}
@InProceedings{anzt:2015,
author={Anzt, Hartwig and Chow, Edmond and Dongarra, Jack},
booktitle={European Conference on Parallel Processing},
title={Iterative sparse triangular solves for preconditioning},
year={2015},
editor={Jesper Larsson Träff and Sascha Hunold and Francesco Versaci},
pages={650--661},
publisher={Springer},
series={Lecture Notes in Computer Science},
doi={10.1007/978-3-662-48096-0_50},
}
@InCollection{anzt:2016,
author={Anzt, Hartwig and Chow, Edmond and Szyld, Daniel B. and Dongarra, Jack},
booktitle={Software for Exascale Computing-SPPEXA 2013-2015},
publisher={Springer},
title={Domain overlap for iterative sparse triangular solves on {GPUs}},
year={2016},
editor={Hans-Joachim Bungartz and Philipp Neumann and Wolfgang E. Nagel},
pages={527--545},
series={Lecture Notes in Computational Science and Engineering},
doi={10.1007/978-3-319-40528-5_24},
}
@Article{ma:2021,
author={Ma, Wenpeng and Hu, Yiwen and Yuan, Wu and Liu, Xiazhen},
journal={Mathematical Problems in Engineering},
title={Developing a multi-{GPU}-enabled preconditioned {GMRES} with inexact triangular solves for block sparse matrices},
year={2021},
volume={2021},
doi={10.1155/2021/6804723},
publisher={Hindawi},
}
@Article{shioya:2021,
author={Akemi Shioya and Yusaku Yamamoto},
journal={Parallel Computing},
title={Block red–black {MILU}(0) preconditioner with relaxation on {GPU}},
year={2021},
issn={0167-8191},
pages={1--13},
volume={103},
doi={10.1016/j.parco.2021.102760},
publisher={Elsevier},
}
@Article{calgaro:2010,
author={Calgaro, Caterina and Chehab, Jean-Paul and Saad, Yousef},
journal={Numerical Linear Algebra with Applications},
title={Incremental incomplete {LU} factorizations with applications},
year={2010},
number={5},
pages={811--837},
volume={17},
doi={10.1002/nla.756},
publisher={Wiley Online Library},
}
@Article{anzt:2016b,
author={Anzt, Hartwig and Chow, Edmond and Saak, Jens and Dongarra, Jack},
journal={Numerical Algorithms},
title={Updating incomplete factorization preconditioners for model order reduction},
year={2016},
issn={1572-9265},
number={3},
pages={611--630},
volume={73},
abstract={When solving a sequence of related linear systems by iterative methods, it is common to reuse the preconditioner for several systems, and then to recompute the preconditioner when the matrix has changed significantly. Rather than recomputing the preconditioner from scratch, it is potentially more efficient to update the previous preconditioner. Unfortunately, it is not always known how to update a preconditioner, for example, when the preconditioner is an incomplete factorization. A recently proposed iterative algorithm for computing incomplete factorizations, however, is able to exploit an initial guess, unlike existing algorithms for incomplete factorizations. By treating a previous factorization as an initial guess to this algorithm, an incomplete factorization may thus be updated. We use a sequence of problems from model order reduction. Experimental results using an optimized GPU implementation show that updating a previous factorization can be inexpensive and effective, making solving sequences of linear systems a potential niche problem for the iterative incomplete factorization algorithm.},
doi={10.1007/s11075-016-0110-2},
publisher={Springer},
}
@InProceedings{chow:1994,
author={Chow, Edmond and Saad, Yousef},
booktitle={Colorado conference on iterative methods},
@@ -641,6 +726,21 @@
publisher={SIAM},
}
@Article{grote:1997,
author={Grote, Marcus J. and Huckle, Thomas},
journal={SIAM Journal on Scientific Computing},
title={Parallel preconditioning with sparse approximate inverses},
year={1997},
issn={1064-8275},
month=may,
number={3},
pages={838--853},
volume={18},
abstract={A parallel preconditioner is presented for the solution of general sparse linear systems of equations. A sparse approximate inverse is computed explicitly and then applied as a preconditioner to an iterative method. The computation of the preconditioner is inherently parallel, and its application only requires a matrix-vector product. The sparsity pattern of the approximate inverse is not imposed a priori but captured automatically. This keeps the amount of work and the number of nonzero entries in the preconditioner to a minimum. Rigorous bounds on the clustering of the eigenvalues and the singular values are derived for the preconditioned system, and the proximity of the approximateto the true inverse is estimated. An extensive set of test problems from scientific and industrial applications provides convincing evidence of the effectiveness of this approach.},
doi={10.1137/S1064827594276552},
publisher={SIAM},
}
@Article{tang:1999,
author={Tang, Wei-Pai},
journal={SIAM Journal on Matrix Analysis and Applications},
@@ -824,6 +924,18 @@
publisher={SIAM},
}
@Article{kopal:2017,
author={Jiří Kopal and Miroslav Rozložník and Miroslav Tůma},