diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h index 67d4aa627f0d03f9945b64956465623c20062824..a1735e9e6de16e1d2804e9524462e5094cc628d1 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h @@ -13,6 +13,7 @@ #pragma once #include <vector> +#include <set> #include "ILUT.h" #include "TriangularSolve.h" @@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer ) L.setDimensions( N, N ); U.setDimensions( N, N ); - Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU; +// Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset; - timer_total.start(); +// timer_total.start(); // compute row lengths - timer_rowlengths.start(); - typename decltype(L)::CompressedRowLengthsVector L_rowLengths; - typename decltype(U)::CompressedRowLengthsVector U_rowLengths; - L_rowLengths.setSize( N ); - U_rowLengths.setSize( N ); +// timer_rowlengths.start(); + typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N ); + typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N ); for( IndexType i = 0; i < N; i++ ) { const auto row = localMatrix.getRow( i ); const auto max_length = localMatrix.getRowLength( i ); @@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer ) } L.setCompressedRowLengths( L_rowLengths ); U.setCompressedRowLengths( U_rowLengths ); - timer_rowlengths.stop(); +// timer_rowlengths.stop(); // intermediate full vector for the i-th row of A VectorType w; @@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer ) w.setValue( 0.0 ); // intermediate vectors for sorting and keeping only the largest values -// using Pair = std::pair< IndexType, RealType >; struct Triplet { IndexType column; RealType value; @@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer ) auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; }; std::vector< Triplet > values_L, values_U; -// std::cout << "N = " << N << std::endl; - // Incomplete LU factorization with threshold // (see Saad - Iterative methods for sparse linear systems, section 10.4) for( IndexType i = 0; i < N; i++ ) { @@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer ) RealType A_i_norm = 0.0; + // set of indices where w_k is non-zero (i.e. {k: w_k != 0}) + std::set< IndexType > w_k_set; + // copy A_i into the full vector w - timer_copy_into_w.start(); +// timer_copy_into_w.start(); for( IndexType c_j = 0; c_j < max_length; c_j++ ) { auto j = A_i.getElementColumn( c_j ); if( minColumn > 0 ) { @@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer ) // running computation of norm A_i_norm += w[ j ] * w[ j ]; + + w_k_set.insert( j ); } - timer_copy_into_w.stop(); +// timer_copy_into_w.stop(); // compute relative tolerance A_i_norm = std::sqrt( A_i_norm ); const RealType tau_i = tau * A_i_norm; // loop for k = 0, ..., i - 1; but only over the non-zero entries of w - timer_k_loop.start(); - for( IndexType k = 0; k < i; k++ ) { - RealType w_k = w[ k ]; - if( w_k == 0.0 ) - continue; +// timer_k_loop.start(); + for( const IndexType k : w_k_set ) { + if( k >= i ) + break; - w_k /= localMatrix.getElementFast( k, k + minColumn ); + RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn ); // apply dropping rule to w_k if( std::abs( w_k ) < tau_i ) @@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer ) // loop for j = 0, ..., N-1; but only over the non-zero entries for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) { const auto j = U_k.getElementColumn( c_j ); + // skip dropped entries if( j >= N ) break; w[ j ] -= w_k * U_k.getElementValue( c_j ); + + // add non-zero to the w_k_set + w_k_set.insert( j ); } } } - timer_k_loop.stop(); +// timer_k_loop.stop(); // apply dropping rule to the row w // (we drop all values under threshold and keep nl(i) + p largest values in L // and nu(i) + p largest values in U; see Saad (2003) for reference) - // TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary) - timer_dropping.start(); - for( IndexType j = 0; j < N; j++ ) { + + // construct heaps with the values in the L and U parts separately +// timer_heap_construct.start(); + for( const IndexType j : w_k_set ) { const RealType w_j_abs = std::abs( w[ j ] ); // ignore small values if( w_j_abs < tau_i ) continue; // push into the heaps for L or U - if( j < i ) { + if( j < i ) heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) ); - std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); - } - else { + else heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) ); - std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); - } } + std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); + std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); +// timer_heap_construct.stop(); + // extract values for L and U - for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) { +// timer_heap_extract.start(); + for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) { // move the largest to the end std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); // move the triplet from one vector into another @@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer ) heap_L.pop_back(); values_L.push_back( largest ); } - for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) { + for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) { // move the largest to the end std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); // move the triplet from one vector into another @@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer ) heap_U.pop_back(); values_U.push_back( largest ); } - // sort by column index to make it insertable into the sparse matrix - std::sort( values_L.begin(), values_L.end(), cmp_column ); - std::sort( values_U.begin(), values_U.end(), cmp_column ); - timer_dropping.stop(); +// timer_heap_extract.stop(); // std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl; - timer_copy_into_LU.start(); +// timer_copy_into_LU.start(); + + // sort by column index to make it insertable into the sparse matrix + std::sort( values_L.begin(), values_L.end(), cmp_column ); + std::sort( values_U.begin(), values_U.end(), cmp_column ); // the row L_i might be empty if( values_L.size() ) { // L_ij = w_j for j = 0, ..., i - 1 auto L_i = L.getRow( i ); - for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) { + for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) { const auto j = values_L[ c_j ].column; -// std::cout << "c_j = " << c_j << ", j = " << j << std::endl; L_i.setElement( c_j, j, values_L[ c_j ].value ); } } // U_ij = w_j for j = i, ..., N - 1 auto U_i = U.getRow( N - 1 - i ); - for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) { + for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) { const auto j = values_U[ c_j ].column; -// std::cout << "c_j = " << c_j << ", j = " << j << std::endl; U_i.setElement( c_j, j, values_U[ c_j ].value ); } - timer_copy_into_LU.stop(); +// timer_copy_into_LU.stop(); // reset w - w.setValue( 0.0 ); +// timer_reset.start(); + for( const IndexType j : w_k_set ) + w[ j ] = 0.0; heap_L.clear(); heap_U.clear(); values_L.clear(); values_U.clear(); +// timer_reset.stop(); } - timer_total.stop(); - - std::cout << "ILUT::update statistics:\n"; - std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; - std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; - std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; - std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; - std::cout << "\ttimer_dropping: " << timer_dropping.getRealTime() << " s\n"; - std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; - std::cout << std::flush; +// timer_total.stop(); + +// std::cout << "ILUT::update statistics:\n"; +// std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; +// std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; +// std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; +// std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; +// std::cout << "\ttimer_heap_construct: " << timer_heap_construct.getRealTime() << " s\n"; +// std::cout << "\ttimer_heap_extract: " << timer_heap_extract.getRealTime() << " s\n"; +// std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; +// std::cout << "\ttimer_reset: " << timer_reset.getRealTime() << " s\n"; +// std::cout << std::flush; } template< typename Matrix, typename Real, typename Index >