Loading src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h +62 −52 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ #pragma once #include <vector> #include <set> #include "ILUT.h" #include "TriangularSolve.h" Loading Loading @@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer ) L.setDimensions( N, N ); U.setDimensions( N, N ); Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU; // Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset; timer_total.start(); // timer_total.start(); // compute row lengths timer_rowlengths.start(); typename decltype(L)::CompressedRowLengthsVector L_rowLengths; typename decltype(U)::CompressedRowLengthsVector U_rowLengths; L_rowLengths.setSize( N ); U_rowLengths.setSize( N ); // timer_rowlengths.start(); typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N ); typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N ); for( IndexType i = 0; i < N; i++ ) { const auto row = localMatrix.getRow( i ); const auto max_length = localMatrix.getRowLength( i ); Loading @@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer ) } L.setCompressedRowLengths( L_rowLengths ); U.setCompressedRowLengths( U_rowLengths ); timer_rowlengths.stop(); // timer_rowlengths.stop(); // intermediate full vector for the i-th row of A VectorType w; Loading @@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer ) w.setValue( 0.0 ); // intermediate vectors for sorting and keeping only the largest values // using Pair = std::pair< IndexType, RealType >; struct Triplet { IndexType column; RealType value; Loading @@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer ) auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; }; std::vector< Triplet > values_L, values_U; // std::cout << "N = " << N << std::endl; // Incomplete LU factorization with threshold // (see Saad - Iterative methods for sparse linear systems, section 10.4) for( IndexType i = 0; i < N; i++ ) { Loading @@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer ) RealType A_i_norm = 0.0; // set of indices where w_k is non-zero (i.e. {k: w_k != 0}) std::set< IndexType > w_k_set; // copy A_i into the full vector w timer_copy_into_w.start(); // timer_copy_into_w.start(); for( IndexType c_j = 0; c_j < max_length; c_j++ ) { auto j = A_i.getElementColumn( c_j ); if( minColumn > 0 ) { Loading @@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer ) // running computation of norm A_i_norm += w[ j ] * w[ j ]; w_k_set.insert( j ); } timer_copy_into_w.stop(); // timer_copy_into_w.stop(); // compute relative tolerance A_i_norm = std::sqrt( A_i_norm ); const RealType tau_i = tau * A_i_norm; // loop for k = 0, ..., i - 1; but only over the non-zero entries of w timer_k_loop.start(); for( IndexType k = 0; k < i; k++ ) { RealType w_k = w[ k ]; if( w_k == 0.0 ) continue; // timer_k_loop.start(); for( const IndexType k : w_k_set ) { if( k >= i ) break; w_k /= localMatrix.getElementFast( k, k + minColumn ); RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn ); // apply dropping rule to w_k if( std::abs( w_k ) < tau_i ) Loading @@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer ) // loop for j = 0, ..., N-1; but only over the non-zero entries for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) { const auto j = U_k.getElementColumn( c_j ); // skip dropped entries if( j >= N ) break; w[ j ] -= w_k * U_k.getElementValue( c_j ); // add non-zero to the w_k_set w_k_set.insert( j ); } } } timer_k_loop.stop(); // timer_k_loop.stop(); // apply dropping rule to the row w // (we drop all values under threshold and keep nl(i) + p largest values in L // and nu(i) + p largest values in U; see Saad (2003) for reference) // TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary) timer_dropping.start(); for( IndexType j = 0; j < N; j++ ) { // construct heaps with the values in the L and U parts separately // timer_heap_construct.start(); for( const IndexType j : w_k_set ) { const RealType w_j_abs = std::abs( w[ j ] ); // ignore small values if( w_j_abs < tau_i ) continue; // push into the heaps for L or U if( j < i ) { if( j < i ) heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) ); std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); } else { else heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) ); std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); } } std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); // timer_heap_construct.stop(); // extract values for L and U for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) { // timer_heap_extract.start(); for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) { // move the largest to the end std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); // move the triplet from one vector into another Loading @@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer ) heap_L.pop_back(); values_L.push_back( largest ); } for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) { for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) { // move the largest to the end std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); // move the triplet from one vector into another Loading @@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer ) heap_U.pop_back(); values_U.push_back( largest ); } // sort by column index to make it insertable into the sparse matrix std::sort( values_L.begin(), values_L.end(), cmp_column ); std::sort( values_U.begin(), values_U.end(), cmp_column ); timer_dropping.stop(); // timer_heap_extract.stop(); // std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl; timer_copy_into_LU.start(); // timer_copy_into_LU.start(); // sort by column index to make it insertable into the sparse matrix std::sort( values_L.begin(), values_L.end(), cmp_column ); std::sort( values_U.begin(), values_U.end(), cmp_column ); // the row L_i might be empty if( values_L.size() ) { // L_ij = w_j for j = 0, ..., i - 1 auto L_i = L.getRow( i ); for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) { for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) { const auto j = values_L[ c_j ].column; // std::cout << "c_j = " << c_j << ", j = " << j << std::endl; L_i.setElement( c_j, j, values_L[ c_j ].value ); } } // U_ij = w_j for j = i, ..., N - 1 auto U_i = U.getRow( N - 1 - i ); for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) { for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) { const auto j = values_U[ c_j ].column; // std::cout << "c_j = " << c_j << ", j = " << j << std::endl; U_i.setElement( c_j, j, values_U[ c_j ].value ); } timer_copy_into_LU.stop(); // timer_copy_into_LU.stop(); // reset w w.setValue( 0.0 ); // timer_reset.start(); for( const IndexType j : w_k_set ) w[ j ] = 0.0; heap_L.clear(); heap_U.clear(); values_L.clear(); values_U.clear(); // timer_reset.stop(); } timer_total.stop(); std::cout << "ILUT::update statistics:\n"; std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; std::cout << "\ttimer_dropping: " << timer_dropping.getRealTime() << " s\n"; std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; std::cout << std::flush; // timer_total.stop(); // std::cout << "ILUT::update statistics:\n"; // std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; // std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; // std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; // std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; // std::cout << "\ttimer_heap_construct: " << timer_heap_construct.getRealTime() << " s\n"; // std::cout << "\ttimer_heap_extract: " << timer_heap_extract.getRealTime() << " s\n"; // std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; // std::cout << "\ttimer_reset: " << timer_reset.getRealTime() << " s\n"; // std::cout << std::flush; } template< typename Matrix, typename Real, typename Index > Loading Loading
src/TNL/Solvers/Linear/Preconditioners/ILUT_impl.h +62 −52 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ #pragma once #include <vector> #include <set> #include "ILUT.h" #include "TriangularSolve.h" Loading Loading @@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer ) L.setDimensions( N, N ); U.setDimensions( N, N ); Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU; // Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset; timer_total.start(); // timer_total.start(); // compute row lengths timer_rowlengths.start(); typename decltype(L)::CompressedRowLengthsVector L_rowLengths; typename decltype(U)::CompressedRowLengthsVector U_rowLengths; L_rowLengths.setSize( N ); U_rowLengths.setSize( N ); // timer_rowlengths.start(); typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N ); typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N ); for( IndexType i = 0; i < N; i++ ) { const auto row = localMatrix.getRow( i ); const auto max_length = localMatrix.getRowLength( i ); Loading @@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer ) } L.setCompressedRowLengths( L_rowLengths ); U.setCompressedRowLengths( U_rowLengths ); timer_rowlengths.stop(); // timer_rowlengths.stop(); // intermediate full vector for the i-th row of A VectorType w; Loading @@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer ) w.setValue( 0.0 ); // intermediate vectors for sorting and keeping only the largest values // using Pair = std::pair< IndexType, RealType >; struct Triplet { IndexType column; RealType value; Loading @@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer ) auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; }; std::vector< Triplet > values_L, values_U; // std::cout << "N = " << N << std::endl; // Incomplete LU factorization with threshold // (see Saad - Iterative methods for sparse linear systems, section 10.4) for( IndexType i = 0; i < N; i++ ) { Loading @@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer ) RealType A_i_norm = 0.0; // set of indices where w_k is non-zero (i.e. {k: w_k != 0}) std::set< IndexType > w_k_set; // copy A_i into the full vector w timer_copy_into_w.start(); // timer_copy_into_w.start(); for( IndexType c_j = 0; c_j < max_length; c_j++ ) { auto j = A_i.getElementColumn( c_j ); if( minColumn > 0 ) { Loading @@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer ) // running computation of norm A_i_norm += w[ j ] * w[ j ]; w_k_set.insert( j ); } timer_copy_into_w.stop(); // timer_copy_into_w.stop(); // compute relative tolerance A_i_norm = std::sqrt( A_i_norm ); const RealType tau_i = tau * A_i_norm; // loop for k = 0, ..., i - 1; but only over the non-zero entries of w timer_k_loop.start(); for( IndexType k = 0; k < i; k++ ) { RealType w_k = w[ k ]; if( w_k == 0.0 ) continue; // timer_k_loop.start(); for( const IndexType k : w_k_set ) { if( k >= i ) break; w_k /= localMatrix.getElementFast( k, k + minColumn ); RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn ); // apply dropping rule to w_k if( std::abs( w_k ) < tau_i ) Loading @@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer ) // loop for j = 0, ..., N-1; but only over the non-zero entries for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) { const auto j = U_k.getElementColumn( c_j ); // skip dropped entries if( j >= N ) break; w[ j ] -= w_k * U_k.getElementValue( c_j ); // add non-zero to the w_k_set w_k_set.insert( j ); } } } timer_k_loop.stop(); // timer_k_loop.stop(); // apply dropping rule to the row w // (we drop all values under threshold and keep nl(i) + p largest values in L // and nu(i) + p largest values in U; see Saad (2003) for reference) // TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary) timer_dropping.start(); for( IndexType j = 0; j < N; j++ ) { // construct heaps with the values in the L and U parts separately // timer_heap_construct.start(); for( const IndexType j : w_k_set ) { const RealType w_j_abs = std::abs( w[ j ] ); // ignore small values if( w_j_abs < tau_i ) continue; // push into the heaps for L or U if( j < i ) { if( j < i ) heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) ); std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); } else { else heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) ); std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); } } std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); // timer_heap_construct.stop(); // extract values for L and U for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) { // timer_heap_extract.start(); for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) { // move the largest to the end std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value ); // move the triplet from one vector into another Loading @@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer ) heap_L.pop_back(); values_L.push_back( largest ); } for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) { for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) { // move the largest to the end std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value ); // move the triplet from one vector into another Loading @@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer ) heap_U.pop_back(); values_U.push_back( largest ); } // sort by column index to make it insertable into the sparse matrix std::sort( values_L.begin(), values_L.end(), cmp_column ); std::sort( values_U.begin(), values_U.end(), cmp_column ); timer_dropping.stop(); // timer_heap_extract.stop(); // std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl; timer_copy_into_LU.start(); // timer_copy_into_LU.start(); // sort by column index to make it insertable into the sparse matrix std::sort( values_L.begin(), values_L.end(), cmp_column ); std::sort( values_U.begin(), values_U.end(), cmp_column ); // the row L_i might be empty if( values_L.size() ) { // L_ij = w_j for j = 0, ..., i - 1 auto L_i = L.getRow( i ); for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) { for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) { const auto j = values_L[ c_j ].column; // std::cout << "c_j = " << c_j << ", j = " << j << std::endl; L_i.setElement( c_j, j, values_L[ c_j ].value ); } } // U_ij = w_j for j = i, ..., N - 1 auto U_i = U.getRow( N - 1 - i ); for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) { for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) { const auto j = values_U[ c_j ].column; // std::cout << "c_j = " << c_j << ", j = " << j << std::endl; U_i.setElement( c_j, j, values_U[ c_j ].value ); } timer_copy_into_LU.stop(); // timer_copy_into_LU.stop(); // reset w w.setValue( 0.0 ); // timer_reset.start(); for( const IndexType j : w_k_set ) w[ j ] = 0.0; heap_L.clear(); heap_U.clear(); values_L.clear(); values_U.clear(); // timer_reset.stop(); } timer_total.stop(); std::cout << "ILUT::update statistics:\n"; std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; std::cout << "\ttimer_dropping: " << timer_dropping.getRealTime() << " s\n"; std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; std::cout << std::flush; // timer_total.stop(); // std::cout << "ILUT::update statistics:\n"; // std::cout << "\ttimer_total: " << timer_total.getRealTime() << " s\n"; // std::cout << "\ttimer_rowlengths: " << timer_rowlengths.getRealTime() << " s\n"; // std::cout << "\ttimer_copy_into_w: " << timer_copy_into_w.getRealTime() << " s\n"; // std::cout << "\ttimer_k_loop: " << timer_k_loop.getRealTime() << " s\n"; // std::cout << "\ttimer_heap_construct: " << timer_heap_construct.getRealTime() << " s\n"; // std::cout << "\ttimer_heap_extract: " << timer_heap_extract.getRealTime() << " s\n"; // std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime() << " s\n"; // std::cout << "\ttimer_reset: " << timer_reset.getRealTime() << " s\n"; // std::cout << std::flush; } template< typename Matrix, typename Real, typename Index > Loading