Skip to content
Snippets Groups Projects

Distributed linear solvers

Merged Jakub Klinkovský requested to merge cineca/mpi into develop
1 file
+ 62
52
Compare changes
  • Side-by-side
  • Inline
@@ -13,6 +13,7 @@
#pragma once
#include <vector>
#include <set>
#include "ILUT.h"
#include "TriangularSolve.h"
@@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer )
L.setDimensions( N, N );
U.setDimensions( N, N );
Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU;
// Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset;
timer_total.start();
// timer_total.start();
// compute row lengths
timer_rowlengths.start();
typename decltype(L)::CompressedRowLengthsVector L_rowLengths;
typename decltype(U)::CompressedRowLengthsVector U_rowLengths;
L_rowLengths.setSize( N );
U_rowLengths.setSize( N );
// timer_rowlengths.start();
typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N );
typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N );
for( IndexType i = 0; i < N; i++ ) {
const auto row = localMatrix.getRow( i );
const auto max_length = localMatrix.getRowLength( i );
@@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer )
}
L.setCompressedRowLengths( L_rowLengths );
U.setCompressedRowLengths( U_rowLengths );
timer_rowlengths.stop();
// timer_rowlengths.stop();
// intermediate full vector for the i-th row of A
VectorType w;
@@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer )
w.setValue( 0.0 );
// intermediate vectors for sorting and keeping only the largest values
// using Pair = std::pair< IndexType, RealType >;
struct Triplet {
IndexType column;
RealType value;
@@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer )
auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; };
std::vector< Triplet > values_L, values_U;
// std::cout << "N = " << N << std::endl;
// Incomplete LU factorization with threshold
// (see Saad - Iterative methods for sparse linear systems, section 10.4)
for( IndexType i = 0; i < N; i++ ) {
@@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer )
RealType A_i_norm = 0.0;
// set of indices where w_k is non-zero (i.e. {k: w_k != 0})
std::set< IndexType > w_k_set;
// copy A_i into the full vector w
timer_copy_into_w.start();
// timer_copy_into_w.start();
for( IndexType c_j = 0; c_j < max_length; c_j++ ) {
auto j = A_i.getElementColumn( c_j );
if( minColumn > 0 ) {
@@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer )
// running computation of norm
A_i_norm += w[ j ] * w[ j ];
w_k_set.insert( j );
}
timer_copy_into_w.stop();
// timer_copy_into_w.stop();
// compute relative tolerance
A_i_norm = std::sqrt( A_i_norm );
const RealType tau_i = tau * A_i_norm;
// loop for k = 0, ..., i - 1; but only over the non-zero entries of w
timer_k_loop.start();
for( IndexType k = 0; k < i; k++ ) {
RealType w_k = w[ k ];
if( w_k == 0.0 )
continue;
// timer_k_loop.start();
for( const IndexType k : w_k_set ) {
if( k >= i )
break;
w_k /= localMatrix.getElementFast( k, k + minColumn );
RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn );
// apply dropping rule to w_k
if( std::abs( w_k ) < tau_i )
@@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer )
// loop for j = 0, ..., N-1; but only over the non-zero entries
for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) {
const auto j = U_k.getElementColumn( c_j );
// skip dropped entries
if( j >= N ) break;
w[ j ] -= w_k * U_k.getElementValue( c_j );
// add non-zero to the w_k_set
w_k_set.insert( j );
}
}
}
timer_k_loop.stop();
// timer_k_loop.stop();
// apply dropping rule to the row w
// (we drop all values under threshold and keep nl(i) + p largest values in L
// and nu(i) + p largest values in U; see Saad (2003) for reference)
// TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary)
timer_dropping.start();
for( IndexType j = 0; j < N; j++ ) {
// construct heaps with the values in the L and U parts separately
// timer_heap_construct.start();
for( const IndexType j : w_k_set ) {
const RealType w_j_abs = std::abs( w[ j ] );
// ignore small values
if( w_j_abs < tau_i )
continue;
// push into the heaps for L or U
if( j < i ) {
if( j < i )
heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) );
std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
}
else {
else
heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) );
std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
}
}
std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
// timer_heap_construct.stop();
// extract values for L and U
for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) {
// timer_heap_extract.start();
for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) {
// move the largest to the end
std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
// move the triplet from one vector into another
@@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer )
heap_L.pop_back();
values_L.push_back( largest );
}
for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) {
for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) {
// move the largest to the end
std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
// move the triplet from one vector into another
@@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer )
heap_U.pop_back();
values_U.push_back( largest );
}
// sort by column index to make it insertable into the sparse matrix
std::sort( values_L.begin(), values_L.end(), cmp_column );
std::sort( values_U.begin(), values_U.end(), cmp_column );
timer_dropping.stop();
// timer_heap_extract.stop();
// std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl;
timer_copy_into_LU.start();
// timer_copy_into_LU.start();
// sort by column index to make it insertable into the sparse matrix
std::sort( values_L.begin(), values_L.end(), cmp_column );
std::sort( values_U.begin(), values_U.end(), cmp_column );
// the row L_i might be empty
if( values_L.size() ) {
// L_ij = w_j for j = 0, ..., i - 1
auto L_i = L.getRow( i );
for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) {
for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) {
const auto j = values_L[ c_j ].column;
// std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
L_i.setElement( c_j, j, values_L[ c_j ].value );
}
}
// U_ij = w_j for j = i, ..., N - 1
auto U_i = U.getRow( N - 1 - i );
for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) {
for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) {
const auto j = values_U[ c_j ].column;
// std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
U_i.setElement( c_j, j, values_U[ c_j ].value );
}
timer_copy_into_LU.stop();
// timer_copy_into_LU.stop();
// reset w
w.setValue( 0.0 );
// timer_reset.start();
for( const IndexType j : w_k_set )
w[ j ] = 0.0;
heap_L.clear();
heap_U.clear();
values_L.clear();
values_U.clear();
// timer_reset.stop();
}
timer_total.stop();
std::cout << "ILUT::update statistics:\n";
std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n";
std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n";
std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n";
std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n";
std::cout << "\ttimer_dropping: " << timer_dropping.getRealTime() << " s\n";
std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n";
std::cout << std::flush;
// timer_total.stop();
// std::cout << "ILUT::update statistics:\n";
// std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n";
// std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n";
// std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n";
// std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n";
// std::cout << "\ttimer_heap_construct: " << timer_heap_construct.getRealTime() << " s\n";
// std::cout << "\ttimer_heap_extract: " << timer_heap_extract.getRealTime() << " s\n";
// std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n";
// std::cout << "\ttimer_reset: " << timer_reset.getRealTime() << " s\n";
// std::cout << std::flush;
}
template< typename Matrix, typename Real, typename Index >
Loading