Skip to content
Snippets Groups Projects
Commit 356abdcb authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Optimized ILUT

parent 163ac7f8
No related branches found
No related tags found
No related merge requests found
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#pragma once #pragma once
#include <vector> #include <vector>
#include <set>
#include "ILUT.h" #include "ILUT.h"
#include "TriangularSolve.h" #include "TriangularSolve.h"
...@@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer ) ...@@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer )
L.setDimensions( N, N ); L.setDimensions( N, N );
U.setDimensions( N, N ); U.setDimensions( N, N );
Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU; // Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset;
timer_total.start(); // timer_total.start();
// compute row lengths // compute row lengths
timer_rowlengths.start(); // timer_rowlengths.start();
typename decltype(L)::CompressedRowLengthsVector L_rowLengths; typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N );
typename decltype(U)::CompressedRowLengthsVector U_rowLengths; typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N );
L_rowLengths.setSize( N );
U_rowLengths.setSize( N );
for( IndexType i = 0; i < N; i++ ) { for( IndexType i = 0; i < N; i++ ) {
const auto row = localMatrix.getRow( i ); const auto row = localMatrix.getRow( i );
const auto max_length = localMatrix.getRowLength( i ); const auto max_length = localMatrix.getRowLength( i );
...@@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer ) ...@@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer )
} }
L.setCompressedRowLengths( L_rowLengths ); L.setCompressedRowLengths( L_rowLengths );
U.setCompressedRowLengths( U_rowLengths ); U.setCompressedRowLengths( U_rowLengths );
timer_rowlengths.stop(); // timer_rowlengths.stop();
// intermediate full vector for the i-th row of A // intermediate full vector for the i-th row of A
VectorType w; VectorType w;
...@@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer ) ...@@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer )
w.setValue( 0.0 ); w.setValue( 0.0 );
// intermediate vectors for sorting and keeping only the largest values // intermediate vectors for sorting and keeping only the largest values
// using Pair = std::pair< IndexType, RealType >;
struct Triplet { struct Triplet {
IndexType column; IndexType column;
RealType value; RealType value;
...@@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer ) ...@@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer )
auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; }; auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; };
std::vector< Triplet > values_L, values_U; std::vector< Triplet > values_L, values_U;
// std::cout << "N = " << N << std::endl;
// Incomplete LU factorization with threshold // Incomplete LU factorization with threshold
// (see Saad - Iterative methods for sparse linear systems, section 10.4) // (see Saad - Iterative methods for sparse linear systems, section 10.4)
for( IndexType i = 0; i < N; i++ ) { for( IndexType i = 0; i < N; i++ ) {
...@@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer ) ...@@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer )
RealType A_i_norm = 0.0; RealType A_i_norm = 0.0;
// set of indices where w_k is non-zero (i.e. {k: w_k != 0})
std::set< IndexType > w_k_set;
// copy A_i into the full vector w // copy A_i into the full vector w
timer_copy_into_w.start(); // timer_copy_into_w.start();
for( IndexType c_j = 0; c_j < max_length; c_j++ ) { for( IndexType c_j = 0; c_j < max_length; c_j++ ) {
auto j = A_i.getElementColumn( c_j ); auto j = A_i.getElementColumn( c_j );
if( minColumn > 0 ) { if( minColumn > 0 ) {
...@@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer ) ...@@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer )
// running computation of norm // running computation of norm
A_i_norm += w[ j ] * w[ j ]; A_i_norm += w[ j ] * w[ j ];
w_k_set.insert( j );
} }
timer_copy_into_w.stop(); // timer_copy_into_w.stop();
// compute relative tolerance // compute relative tolerance
A_i_norm = std::sqrt( A_i_norm ); A_i_norm = std::sqrt( A_i_norm );
const RealType tau_i = tau * A_i_norm; const RealType tau_i = tau * A_i_norm;
// loop for k = 0, ..., i - 1; but only over the non-zero entries of w // loop for k = 0, ..., i - 1; but only over the non-zero entries of w
timer_k_loop.start(); // timer_k_loop.start();
for( IndexType k = 0; k < i; k++ ) { for( const IndexType k : w_k_set ) {
RealType w_k = w[ k ]; if( k >= i )
if( w_k == 0.0 ) break;
continue;
w_k /= localMatrix.getElementFast( k, k + minColumn ); RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn );
// apply dropping rule to w_k // apply dropping rule to w_k
if( std::abs( w_k ) < tau_i ) if( std::abs( w_k ) < tau_i )
...@@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer ) ...@@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer )
// loop for j = 0, ..., N-1; but only over the non-zero entries // loop for j = 0, ..., N-1; but only over the non-zero entries
for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) { for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) {
const auto j = U_k.getElementColumn( c_j ); const auto j = U_k.getElementColumn( c_j );
// skip dropped entries // skip dropped entries
if( j >= N ) break; if( j >= N ) break;
w[ j ] -= w_k * U_k.getElementValue( c_j ); w[ j ] -= w_k * U_k.getElementValue( c_j );
// add non-zero to the w_k_set
w_k_set.insert( j );
} }
} }
} }
timer_k_loop.stop(); // timer_k_loop.stop();
// apply dropping rule to the row w // apply dropping rule to the row w
// (we drop all values under threshold and keep nl(i) + p largest values in L // (we drop all values under threshold and keep nl(i) + p largest values in L
// and nu(i) + p largest values in U; see Saad (2003) for reference) // and nu(i) + p largest values in U; see Saad (2003) for reference)
// TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary)
timer_dropping.start(); // construct heaps with the values in the L and U parts separately
for( IndexType j = 0; j < N; j++ ) { // timer_heap_construct.start();
for( const IndexType j : w_k_set ) {
const RealType w_j_abs = std::abs( w[ j ] ); const RealType w_j_abs = std::abs( w[ j ] );
// ignore small values // ignore small values
if( w_j_abs < tau_i ) if( w_j_abs < tau_i )
continue; continue;
// push into the heaps for L or U // push into the heaps for L or U
if( j < i ) { if( j < i )
heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) ); heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) );
std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); else
}
else {
heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) ); heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) );
std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
}
} }
std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
// timer_heap_construct.stop();
// extract values for L and U // extract values for L and U
for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) { // timer_heap_extract.start();
for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) {
// move the largest to the end // move the largest to the end
std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
// move the triplet from one vector into another // move the triplet from one vector into another
...@@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer ) ...@@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer )
heap_L.pop_back(); heap_L.pop_back();
values_L.push_back( largest ); values_L.push_back( largest );
} }
for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) { for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) {
// move the largest to the end // move the largest to the end
std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
// move the triplet from one vector into another // move the triplet from one vector into another
...@@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer ) ...@@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer )
heap_U.pop_back(); heap_U.pop_back();
values_U.push_back( largest ); values_U.push_back( largest );
} }
// sort by column index to make it insertable into the sparse matrix // timer_heap_extract.stop();
std::sort( values_L.begin(), values_L.end(), cmp_column );
std::sort( values_U.begin(), values_U.end(), cmp_column );
timer_dropping.stop();
// std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl; // std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl;
timer_copy_into_LU.start(); // timer_copy_into_LU.start();
// sort by column index to make it insertable into the sparse matrix
std::sort( values_L.begin(), values_L.end(), cmp_column );
std::sort( values_U.begin(), values_U.end(), cmp_column );
// the row L_i might be empty // the row L_i might be empty
if( values_L.size() ) { if( values_L.size() ) {
// L_ij = w_j for j = 0, ..., i - 1 // L_ij = w_j for j = 0, ..., i - 1
auto L_i = L.getRow( i ); auto L_i = L.getRow( i );
for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) { for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) {
const auto j = values_L[ c_j ].column; const auto j = values_L[ c_j ].column;
// std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
L_i.setElement( c_j, j, values_L[ c_j ].value ); L_i.setElement( c_j, j, values_L[ c_j ].value );
} }
} }
// U_ij = w_j for j = i, ..., N - 1 // U_ij = w_j for j = i, ..., N - 1
auto U_i = U.getRow( N - 1 - i ); auto U_i = U.getRow( N - 1 - i );
for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) { for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) {
const auto j = values_U[ c_j ].column; const auto j = values_U[ c_j ].column;
// std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
U_i.setElement( c_j, j, values_U[ c_j ].value ); U_i.setElement( c_j, j, values_U[ c_j ].value );
} }
timer_copy_into_LU.stop(); // timer_copy_into_LU.stop();
// reset w // reset w
w.setValue( 0.0 ); // timer_reset.start();
for( const IndexType j : w_k_set )
w[ j ] = 0.0;
heap_L.clear(); heap_L.clear();
heap_U.clear(); heap_U.clear();
values_L.clear(); values_L.clear();
values_U.clear(); values_U.clear();
// timer_reset.stop();
} }
timer_total.stop(); // timer_total.stop();
std::cout << "ILUT::update statistics:\n"; // std::cout << "ILUT::update statistics:\n";
std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; // std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n";
std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; // std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n";
std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; // std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n";
std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; // std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n";
std::cout << "\ttimer_dropping: " << timer_dropping.getRealTime() << " s\n"; // std::cout << "\ttimer_heap_construct: " << timer_heap_construct.getRealTime() << " s\n";
std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; // std::cout << "\ttimer_heap_extract: " << timer_heap_extract.getRealTime() << " s\n";
std::cout << std::flush; // std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n";
// std::cout << "\ttimer_reset: " << timer_reset.getRealTime() << " s\n";
// std::cout << std::flush;
} }
template< typename Matrix, typename Real, typename Index > template< typename Matrix, typename Real, typename Index >
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment