Commit e7c80efe authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Optimized ILUT

parent 8ae818f1
Loading
Loading
Loading
Loading
+62 −52
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#pragma once

#include <vector>
#include <set>

#include "ILUT.h"
#include "TriangularSolve.h"
@@ -50,16 +51,14 @@ update( const MatrixPointer& matrixPointer )
   L.setDimensions( N, N );
   U.setDimensions( N, N );

   Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_dropping, timer_copy_into_LU;
//   Timer timer_total, timer_rowlengths, timer_copy_into_w, timer_k_loop, timer_heap_construct, timer_heap_extract, timer_copy_into_LU, timer_reset;

   timer_total.start();
//   timer_total.start();

   // compute row lengths
   timer_rowlengths.start();
   typename decltype(L)::CompressedRowLengthsVector L_rowLengths;
   typename decltype(U)::CompressedRowLengthsVector U_rowLengths;
   L_rowLengths.setSize( N );
   U_rowLengths.setSize( N );
//   timer_rowlengths.start();
   typename decltype(L)::CompressedRowLengthsVector L_rowLengths( N );
   typename decltype(U)::CompressedRowLengthsVector U_rowLengths( N );
   for( IndexType i = 0; i < N; i++ ) {
      const auto row = localMatrix.getRow( i );
      const auto max_length = localMatrix.getRowLength( i );
@@ -82,7 +81,7 @@ update( const MatrixPointer& matrixPointer )
   }
   L.setCompressedRowLengths( L_rowLengths );
   U.setCompressedRowLengths( U_rowLengths );
   timer_rowlengths.stop();
//   timer_rowlengths.stop();

   // intermediate full vector for the i-th row of A
   VectorType w;
@@ -90,7 +89,6 @@ update( const MatrixPointer& matrixPointer )
   w.setValue( 0.0 );

   // intermediate vectors for sorting and keeping only the largest values
//   using Pair = std::pair< IndexType, RealType >;
   struct Triplet {
      IndexType column;
      RealType value;
@@ -102,8 +100,6 @@ update( const MatrixPointer& matrixPointer )
   auto cmp_column = []( const Triplet& a, const Triplet& b ){ return a.column < b.column; };
   std::vector< Triplet > values_L, values_U;

//   std::cout << "N = " << N << std::endl;

   // Incomplete LU factorization with threshold
   // (see Saad - Iterative methods for sparse linear systems, section 10.4)
   for( IndexType i = 0; i < N; i++ ) {
@@ -112,8 +108,11 @@ update( const MatrixPointer& matrixPointer )

      RealType A_i_norm = 0.0;

      // set of indices where w_k is non-zero (i.e. {k: w_k != 0})
      std::set< IndexType > w_k_set;

      // copy A_i into the full vector w
      timer_copy_into_w.start();
//      timer_copy_into_w.start();
      for( IndexType c_j = 0; c_j < max_length; c_j++ ) {
         auto j = A_i.getElementColumn( c_j );
         if( minColumn > 0 ) {
@@ -127,21 +126,22 @@ update( const MatrixPointer& matrixPointer )

         // running computation of norm
         A_i_norm += w[ j ] * w[ j ];

         w_k_set.insert( j );
      }
      timer_copy_into_w.stop();
//      timer_copy_into_w.stop();

      // compute relative tolerance
      A_i_norm = std::sqrt( A_i_norm );
      const RealType tau_i = tau * A_i_norm;

      // loop for k = 0, ..., i - 1; but only over the non-zero entries of w
      timer_k_loop.start();
      for( IndexType k = 0; k < i; k++ ) {
         RealType w_k = w[ k ];
         if( w_k == 0.0 )
            continue;
//      timer_k_loop.start();
      for( const IndexType k : w_k_set ) {
         if( k >= i )
            break;

         w_k /= localMatrix.getElementFast( k, k + minColumn );
         RealType w_k = w[ k ] / localMatrix.getElementFast( k, k + minColumn );

         // apply dropping rule to w_k
         if( std::abs( w_k ) < tau_i )
@@ -155,36 +155,42 @@ update( const MatrixPointer& matrixPointer )
            // loop for j = 0, ..., N-1; but only over the non-zero entries
            for( Index c_j = 0; c_j < U_rowLengths[ N - 1 - k ]; c_j++ ) {
               const auto j = U_k.getElementColumn( c_j );

               // skip dropped entries
               if( j >= N ) break;
               w[ j ] -= w_k * U_k.getElementValue( c_j );

               // add non-zero to the w_k_set
               w_k_set.insert( j );
            }
         }
      }
      timer_k_loop.stop();
//      timer_k_loop.stop();

      // apply dropping rule to the row w
      // (we drop all values under threshold and keep nl(i) + p largest values in L
      // and nu(i) + p largest values in U; see Saad (2003) for reference)
      // TODO: refactoring!!! (use the quick-split strategy, constructing the heap is not necessary)
      timer_dropping.start();
      for( IndexType j = 0; j < N; j++ ) {

      // construct heaps with the values in the L and U parts separately
//      timer_heap_construct.start();
      for( const IndexType j : w_k_set ) {
         const RealType w_j_abs = std::abs( w[ j ] );
         // ignore small values
         if( w_j_abs < tau_i )
            continue;
         // push into the heaps for L or U
         if( j < i ) {
         if( j < i )
            heap_L.push_back( Triplet( j, w[ j ], w_j_abs ) );
            std::push_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
         }
         else {
         else
            heap_U.push_back( Triplet( j, w[ j ], w_j_abs ) );
            std::push_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
         }
      }
      std::make_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
      std::make_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
//      timer_heap_construct.stop();

      // extract values for L and U
      for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < heap_L.size(); c_j++ ) {
//      timer_heap_extract.start();
      for( IndexType c_j = 0; c_j < L_rowLengths[ i ] && c_j < (IndexType) heap_L.size(); c_j++ ) {
         // move the largest to the end
         std::pop_heap( heap_L.begin(), heap_L.end(), cmp_abs_value );
         // move the triplet from one vector into another
@@ -192,7 +198,7 @@ update( const MatrixPointer& matrixPointer )
         heap_L.pop_back();
         values_L.push_back( largest );
      }
      for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < heap_U.size(); c_j++ ) {
      for( IndexType c_j = 0; c_j < U_rowLengths[ N - 1 - i ] && c_j < (IndexType) heap_U.size(); c_j++ ) {
         // move the largest to the end
         std::pop_heap( heap_U.begin(), heap_U.end(), cmp_abs_value );
         // move the triplet from one vector into another
@@ -200,55 +206,59 @@ update( const MatrixPointer& matrixPointer )
         heap_U.pop_back();
         values_U.push_back( largest );
      }
      // sort by column index to make it insertable into the sparse matrix
      std::sort( values_L.begin(), values_L.end(), cmp_column );
      std::sort( values_U.begin(), values_U.end(), cmp_column );
      timer_dropping.stop();
//      timer_heap_extract.stop();

//      std::cout << "i = " << i << ", L_rowLengths[ i ] = " << L_rowLengths[ i ] << ", U_rowLengths[ i ] = " << U_rowLengths[ N - 1 - i ] << std::endl;

      timer_copy_into_LU.start();
//      timer_copy_into_LU.start();

      // sort by column index to make it insertable into the sparse matrix
      std::sort( values_L.begin(), values_L.end(), cmp_column );
      std::sort( values_U.begin(), values_U.end(), cmp_column );

      // the row L_i might be empty
      if( values_L.size() ) {
         // L_ij = w_j for j = 0, ..., i - 1
         auto L_i = L.getRow( i );
         for( IndexType c_j = 0; c_j < values_L.size(); c_j++ ) {
         for( IndexType c_j = 0; c_j < (IndexType) values_L.size(); c_j++ ) {
            const auto j = values_L[ c_j ].column;
//            std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
            L_i.setElement( c_j, j, values_L[ c_j ].value );
         }
      }

      // U_ij = w_j for j = i, ..., N - 1
      auto U_i = U.getRow( N - 1 - i );
      for( IndexType c_j = 0; c_j < values_U.size(); c_j++ ) {
      for( IndexType c_j = 0; c_j < (IndexType) values_U.size(); c_j++ ) {
         const auto j = values_U[ c_j ].column;
//         std::cout << "c_j = " << c_j << ", j = " << j << std::endl;
         U_i.setElement( c_j, j, values_U[ c_j ].value );
      }

      timer_copy_into_LU.stop();
//      timer_copy_into_LU.stop();

      // reset w
      w.setValue( 0.0 );
//      timer_reset.start();
      for( const IndexType j : w_k_set )
         w[ j ] = 0.0;

      heap_L.clear();
      heap_U.clear();
      values_L.clear();
      values_U.clear();
//      timer_reset.stop();
   }

   timer_total.stop();

   std::cout << "ILUT::update statistics:\n";
   std::cout << "\ttimer_total:        " << timer_total.getRealTime()         << " s\n";
   std::cout << "\ttimer_rowlengths:   " << timer_rowlengths.getRealTime()    << " s\n";
   std::cout << "\ttimer_copy_into_w:  " << timer_copy_into_w.getRealTime()   << " s\n";
   std::cout << "\ttimer_k_loop:       " << timer_k_loop.getRealTime()        << " s\n";
   std::cout << "\ttimer_dropping:     " << timer_dropping.getRealTime()      << " s\n";
   std::cout << "\ttimer_copy_into_LU: " << timer_copy_into_LU.getRealTime()  << " s\n";
   std::cout << std::flush;
//   timer_total.stop();

//   std::cout << "ILUT::update statistics:\n";
//   std::cout << "\ttimer_total:           " << timer_total.getRealTime()          << " s\n";
//   std::cout << "\ttimer_rowlengths:      " << timer_rowlengths.getRealTime()     << " s\n";
//   std::cout << "\ttimer_copy_into_w:     " << timer_copy_into_w.getRealTime()    << " s\n";
//   std::cout << "\ttimer_k_loop:          " << timer_k_loop.getRealTime()         << " s\n";
//   std::cout << "\ttimer_heap_construct:  " << timer_heap_construct.getRealTime() << " s\n";
//   std::cout << "\ttimer_heap_extract:    " << timer_heap_extract.getRealTime()   << " s\n";
//   std::cout << "\ttimer_copy_into_LU:    " << timer_copy_into_LU.getRealTime()   << " s\n";
//   std::cout << "\ttimer_reset:           " << timer_reset.getRealTime()          << " s\n";
//   std::cout << std::flush;
}

template< typename Matrix, typename Real, typename Index >