Skip to content
Snippets Groups Projects
Commit 356abdcb authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Optimized ILUT

parent 163ac7f8
No related branches found
No related tags found
No related merge requests found
......@@ -13,6 +13,7 @@
#pragma once
#include <vector>
#include <set>
#include "ILUT.h"
#include "TriangularSolve.h"
......@@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer )
L.setDimensions( N, N );
U.setDimensions( N, N );
Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU;
// Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset;
timer_total.start();
// timer_total.start();
// compute row lengths
timer_rowlengths.start();
typename decltype(L)::CompressedRowLengthsVector L_rowLengths;
typename decltype(U)::CompressedRowLengthsVector U_rowLengths;
L_rowLengths.setSize( N );
U_rowLengths.setSize( N );
// timer_rowlengths.start();
typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N );
typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N );
for( IndexType i = 0; i < N; i++ ) {
const auto row = localMatrix.getRow( i );
const auto max_length = localMatrix.getRowLength( i );
......@@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer )
}
L.setCompressedRowLengths( L_rowLengths );
U.setCompressedRowLengths( U_rowLengths );
timer_rowlengths.stop();
// timer_rowlengths.stop();
// intermediate full vector for the i-th row of A
VectorType w;
......@@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer )
w.setValue( 0.0 );
// intermediate vectors for sorting and keeping only the largest values
// using Pair = std::pair< IndexType, RealType >;
struct Triplet {
IndexType column;
RealType value;
......@@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer )
auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; };
std::vector< Triplet > values_L, values_U;
// std::cout << "N = " << N << std::endl;
// Incomplete LU factorization with threshold
// (see Saad - Iterative methods for sparse linear systems, section 10.4)
for( IndexType i = 0; i < N; i++ ) {
......@@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer )
RealType A_i_norm = 0.0;
// set of indices where w_k is non-zero (i.e. {k: w_k != 0})
std::set< IndexType > w_k_set;
// copy A_i into the full vector w
timer_copy_into_w.start();
// timer_copy_into_w.start();
for( IndexType c_j = 0; c_j < max_length; c_j++ ) {
auto j = A_i.getElementColumn( c_j );
if( minColumn > 0 ) {
......@@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer )
// running computation of norm
A_i_norm += w[ j ] * w[ j ];
w_k_set.insert( j );
}
timer_copy_into_w.stop();
// timer_copy_into_w.stop();
// compute relative tolerance
A_i_norm = std::sqrt( A_i_norm );
const RealType tau_i = tau * A_i_norm;
// loop for k = 0, ..., i - 1; but only over the non-zero entries of w
timer_k_loop.start();
for( IndexType k = 0; k < i; k++ ) {
RealType w_k = w[ k ];
if( w_k == 0.0 )
continue;
// timer_k_loop.start();
for( const IndexType k : w_k_set ) {
if( k >= i )
break;
w_k /= localMatrix.getElementFast( k, k + minColumn );
RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn );
// apply dropping rule to w_k
if( std::abs( w_k ) < tau_i )
......@@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer )
// loop for j = 0, ..., N-1; but only over the non-zero entries
for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) {
const auto j = U_k.getElementColumn( c_j );
// skip dropped entries
if( j >= N ) break;
w[ j ] -= w_k * U_k.getElementValue( c_j );
// add non-zero to the w_k_set
w_k_set.insert( j );
}
}
}
timer_k_loop.stop();
// timer_k_loop.stop();
// apply dropping rule to the row w
// (we drop all values under threshold and keep nl(i) + p largest values in L
// and nu(i) + p largest values in U; see Saad (2003) for reference)
// TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary)
timer_dropping.start();
for( IndexType j = 0; j < N; j++ ) {
// construct heaps with the values in the L and U parts separately
// timer_heap_construct.start();
for( const IndexType j : w_k_set ) {
const RealType w_j_abs = std::abs( w[ j ] );
// ignore small values
if( w_j_abs < tau_i )
continue;
// push into the heaps for L or U
if( j < i ) {
if( j < i )
heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) );
std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
}
else {
else
heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) );
std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
}
}
std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
// timer_heap_construct.stop();
// extract values for L and U
for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) {
// timer_heap_extract.start();
for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) {
// move the largest to the end
std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
// move the triplet from one vector into another
......@@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer )
heap_L.pop_back();
values_L.push_back( largest );
}
for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) {
for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) {
// move the largest to the end
std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
// move the triplet from one vector into another
......@@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer )
heap_U.pop_back();
values_U.push_back( largest );
}
// sort by column index to make it insertable into the sparse matrix
std::sort( values_L.begin(), values_L.end(), cmp_column );
std::sort( values_U.begin(), values_U.end(), cmp_column );
timer_dropping.stop();
// timer_heap_extract.stop();
// std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl;
timer_copy_into_LU.start();
// timer_copy_into_LU.start();
// sort by column index to make it insertable into the sparse matrix
std::sort( values_L.begin(), values_L.end(), cmp_column );
std::sort( values_U.begin(), values_U.end(), cmp_column );
// the row L_i might be empty
if( values_L.size() ) {
// L_ij = w_j for j = 0, ..., i - 1
auto L_i = L.getRow( i );
for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) {
for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) {
const auto j = values_L[ c_j ].column;
// std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
L_i.setElement( c_j, j, values_L[ c_j ].value );
}
}
// U_ij = w_j for j = i, ..., N - 1
auto U_i = U.getRow( N - 1 - i );
for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) {
for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) {
const auto j = values_U[ c_j ].column;
// std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
U_i.setElement( c_j, j, values_U[ c_j ].value );
}
timer_copy_into_LU.stop();
// timer_copy_into_LU.stop();
// reset w
w.setValue( 0.0 );
// timer_reset.start();
for( const IndexType j : w_k_set )
w[ j ] = 0.0;
heap_L.clear();
heap_U.clear();
values_L.clear();
values_U.clear();
// timer_reset.stop();
}
timer_total.stop();
std::cout << "ILUT::update statistics:\n";
std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n";
std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n";
std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n";
std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n";
std::cout << "\ttimer_dropping: " << timer_dropping.getRealTime() << " s\n";
std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n";
std::cout << std::flush;
// timer_total.stop();
// std::cout << "ILUT::update statistics:\n";
// std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n";
// std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n";
// std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n";
// std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n";
// std::cout << "\ttimer_heap_construct: " << timer_heap_construct.getRealTime() << " s\n";
// std::cout << "\ttimer_heap_extract: " << timer_heap_extract.getRealTime() << " s\n";
// std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n";
// std::cout << "\ttimer_reset: " << timer_reset.getRealTime() << " s\n";
// std::cout << std::flush;
}
template< typename Matrix, typename Real, typename Index >
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment