Loading src/TNL/Solvers/Linear/CWYGMRES.h +1 −1 Original line number Diff line number Diff line Loading @@ -119,7 +119,7 @@ protected: // host-only storage for Givens rotations and the least squares problem HostVector cs, sn, H, s; IndexType size, ldSize, restarting; IndexType size, ldSize, restarting_min, restarting_max, restarting_step_min, restarting_step_max; MatrixPointer matrix; PreconditionerPointer preconditioner; Loading src/TNL/Solvers/Linear/CWYGMRES_impl.h +72 −28 Original line number Diff line number Diff line Loading @@ -29,7 +29,10 @@ CWYGMRES< Matrix, Preconditioner >:: CWYGMRES() : size( 0 ), ldSize( 0 ), restarting( 10 ) restarting_min( 10 ), restarting_max( 10 ), restarting_step_min( 3 ), restarting_step_max( 3 ) { /**** * Clearing the shared pointer means that there is no Loading Loading @@ -64,7 +67,10 @@ configSetup( Config::ConfigDescription& config, const String& prefix ) { //IterativeSolver< RealType, IndexType >::configSetup( config, prefix ); config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the CWYGMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); } template< typename Matrix, Loading @@ -75,7 +81,10 @@ setup( const Config::ParameterContainer& parameters, const String& prefix ) { IterativeSolver< RealType, IndexType >::setup( parameters, prefix ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) ); restarting_min = parameters.getParameter< int >( "gmres-restarting-min" ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) ); restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" ); restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" ); return true; } Loading @@ -87,7 +96,7 @@ setRestarting( IndexType rest ) { if( size != 0 ) setSize( size, rest ); restarting = rest; restarting_max = rest; } template< typename Matrix, Loading Loading @@ -116,13 +125,19 @@ CWYGMRES< Matrix, Preconditioner >:: solve( const Vector& b, Vector& x ) { TNL_ASSERT( matrix, std::cerr << "No matrix was set in CWYGMRES. Call setMatrix() before solve()." << std::endl ); if( restarting <= 0 ) if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max ) { std::cerr << "I have wrong value for the restarting of the CWYGMRES solver. It is set to " << restarting << ". Please set some positive value using the SetRestarting method." << std::endl; std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min << ", r_max = " << restarting_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting ) ) if( restarting_step_min < 0 || restarting_step_max < 0 || restarting_step_min > restarting_step_max ) { std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min << ", d_max = " << restarting_step_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting_max ) ) { std::cerr << "I am not able to allocate enough memory for the CWYGMRES solver. You may try to decrease the restarting parameter." << std::endl; return false; Loading Loading @@ -160,10 +175,35 @@ solve( const Vector& b, Vector& x ) this->resetIterations(); this->setResidue( beta / normb ); // parameters for the adaptivity of the restarting parameter RealType beta_ratio = 1; // = beta / beta_ratio (small value indicates good convergence rate) const RealType max_beta_ratio = 0.99; // = cos(8°) \approx 0.99 const RealType min_beta_ratio = 0.175; // = cos(80°) \approx 0.175 int restart_cycles = 0; // counter of restart cycles int m = restarting_max; // current restarting parameter DeviceVector vi, vk; while( this->checkNextIteration() ) { const IndexType m = restarting; // adaptivity of the restarting parameter if( restart_cycles > 0 ) { if( beta_ratio > max_beta_ratio ) // near stagnation -> set maximum m = restarting_max; else if( beta_ratio >= min_beta_ratio ) { // the step size is determined based on current m using linear interpolation // between restarting_step_min and restarting_step_max const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) / ( restarting_max - restarting_min ) * ( m - restarting_min ); if( m - step >= restarting_min ) m -= step; else // set restarting_max when we hit restarting_min (see Baker et al. (2009)) m = restarting_max; } // std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << " " << std::endl; } /*** * z = r / | r | = 1.0 / beta * r Loading Loading @@ -295,6 +335,7 @@ solve( const Vector& b, Vector& x ) /**** * r = M.solve(b - A * x); */ const RealType beta_old = beta; if( preconditioner ) { matrix->vectorProduct( x, _M_tmp ); Loading @@ -313,6 +354,9 @@ solve( const Vector& b, Vector& x ) // cout << " beta = " << beta << endl; // cout << "residue = " << beta / normb << endl; // update parameters for the adaptivity of the restarting parameter ++restart_cycles; beta_ratio = beta / beta_old; } this->refreshSolverMonitor( true ); return this->checkConvergence(); Loading Loading @@ -401,7 +445,7 @@ hauseholder_generate( DeviceVector& Y, // assuming it's stable enough... const RealType t_i = 2.0 / (norm_yi * norm_yi); T[ i + i * (restarting + 1) ] = t_i; T[ i + i * (restarting_max + 1) ] = t_i; if( i > 0 ) { // aux = Y_{i-1}^T * y_i RealType aux[ i ]; Loading @@ -421,9 +465,9 @@ hauseholder_generate( DeviceVector& Y, // [T_i]_{0..i-1} = - T_{i-1} * t_i * aux for( int k = 0; k < i; k++ ) { T[ k + i * (restarting + 1) ] = 0.0; T[ k + i * (restarting_max + 1) ] = 0.0; for( int j = k; j < i; j++ ) T[ k + i * (restarting + 1) ] -= T[ k + j * (restarting + 1) ] * (t_i * aux[ j ]); T[ k + i * (restarting_max + 1) ] -= T[ k + j * (restarting_max + 1) ] * (t_i * aux[ j ]); } } } Loading @@ -441,7 +485,7 @@ hauseholder_apply_trunc( HostVector& out, DeviceVector y_i; y_i.bind( &Y.getData()[ i * ldSize ], size ); const RealType aux = T[ i + i * (restarting + 1) ] * y_i.scalarProduct( z ); const RealType aux = T[ i + i * (restarting_max + 1) ] * y_i.scalarProduct( z ); if( std::is_same< DeviceType, Devices::Host >::value ) { for( int k = 0; k <= i; k++ ) out[ k ] = z[ k ] - y_i[ k ] * aux; Loading @@ -449,9 +493,9 @@ hauseholder_apply_trunc( HostVector& out, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // copy part of y_i to buffer on host // here we duplicate the upper (m+1)x(m+1) submatrix of Y on host for fast access RealType* host_yi = &YL[ i * (restarting + 1) ]; RealType* host_yi = &YL[ i * (restarting_max + 1) ]; RealType host_z[ i + 1 ]; if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting + 1 ) || if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting_max + 1 ) || ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_z, z.getData(), i + 1 ) ) { std::cerr << "Failed to copy part of device vectors y_i or z to host buffer." << std::endl; Loading Loading @@ -480,7 +524,7 @@ hauseholder_cwy( DeviceVector& v, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // the upper (m+1)x(m+1) submatrix of Y is duplicated on host for fast access for( int k = 0; k <= i; k++ ) aux[ k ] = YL[ i + k * (restarting + 1) ]; aux[ k ] = YL[ i + k * (restarting_max + 1) ]; } // aux = T_i * aux Loading @@ -488,7 +532,7 @@ hauseholder_cwy( DeviceVector& v, for( int k = 0; k <= i; k++ ) { RealType aux2 = 0.0; for( int j = k; j <= i; j++ ) aux2 += T[ k + j * (restarting + 1) ] * aux[ j ]; aux2 += T[ k + j * (restarting_max + 1) ] * aux[ j ]; aux[ k ] = aux2; } Loading Loading @@ -530,7 +574,7 @@ hauseholder_cwy_transposed( DeviceVector& z, for( int k = i; k >= 0; k-- ) { RealType aux2 = 0.0; for( int j = 0; j <= k; j++ ) aux2 += T[ j + k * (restarting + 1) ] * aux[ j ]; aux2 += T[ j + k * (restarting_max + 1) ] * aux[ j ]; aux[ k ] = aux2; } Loading Loading @@ -631,7 +675,7 @@ template< typename Matrix, typename Preconditioner > bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m ) { if( size == _size && restarting == m ) return true; if( size == _size && restarting_max == m ) return true; size = _size; if( std::is_same< DeviceType, Devices::Cuda >::value ) // align each column to 256 bytes - optimal for CUDA Loading @@ -639,18 +683,18 @@ bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m else // on the host, we add 1 to disrupt the cache false-sharing pattern ldSize = roundToMultiple( size, 256 / sizeof( RealType ) ) + 1; restarting = m; restarting_max = m; if( ! r.setSize( size ) || ! z.setSize( size ) || ! w.setSize( size ) || ! V.setSize( ldSize * ( restarting + 1 ) ) || ! Y.setSize( ldSize * ( restarting + 1 ) ) || ! T.setSize( (restarting + 1) * (restarting + 1) ) || ! YL.setSize( (restarting + 1) * (restarting + 1) ) || ! cs.setSize( restarting + 1 ) || ! sn.setSize( restarting + 1 ) || ! H.setSize( ( restarting + 1 ) * restarting ) || ! s.setSize( restarting + 1 ) || ! V.setSize( ldSize * ( m + 1 ) ) || ! Y.setSize( ldSize * ( m + 1 ) ) || ! T.setSize( (m + 1) * (m + 1) ) || ! YL.setSize( (m + 1) * (m + 1) ) || ! cs.setSize( m + 1 ) || ! sn.setSize( m + 1 ) || ! H.setSize( ( m + 1 ) * m ) || ! s.setSize( m + 1 ) || ! _M_tmp.setSize( size ) ) { std::cerr << "I could not allocate all supporting arrays for the CWYGMRES solver." << std::endl; Loading src/TNL/Solvers/Linear/GMRES.h +1 −1 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ protected: Containers::Vector< RealType, DeviceType, IndexType > _r, w, _v, _M_tmp; Containers::Vector< RealType, Devices::Host, IndexType > _s, _cs, _sn, _H; IndexType size, restarting; IndexType size, restarting_min, restarting_max, restarting_step_min, restarting_step_max; MatrixPointer matrix; Loading src/TNL/Solvers/Linear/GMRES_impl.h +61 −16 Original line number Diff line number Diff line Loading @@ -21,7 +21,10 @@ template< typename Matrix, GMRES< Matrix, Preconditioner >:: GMRES() : size( 0 ), restarting( 10 ) restarting_min( 10 ), restarting_max( 10 ), restarting_step_min( 3 ), restarting_step_max( 3 ) { /**** * Clearing the shared pointer means that there is no Loading Loading @@ -56,7 +59,10 @@ configSetup( Config::ConfigDescription& config, const String& prefix ) { //IterativeSolver< RealType, IndexType >::configSetup( config, prefix ); config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); } template< typename Matrix, Loading @@ -67,7 +73,10 @@ setup( const Config::ParameterContainer& parameters, const String& prefix ) { IterativeSolver< RealType, IndexType >::setup( parameters, prefix ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) ); restarting_min = parameters.getParameter< int >( "gmres-restarting-min" ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) ); restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" ); restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" ); return true; } Loading @@ -79,7 +88,7 @@ setRestarting( IndexType rest ) { if( size != 0 ) setSize( size, rest ); restarting = rest; restarting_max = rest; } template< typename Matrix, Loading Loading @@ -108,13 +117,19 @@ GMRES< Matrix, Preconditioner >:: solve( const Vector& b, Vector& x ) { TNL_ASSERT( matrix, std::cerr << "No matrix was set in GMRES. Call setMatrix() before solve()." << std::endl ); if( restarting <= 0 ) if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max ) { std::cerr << "I have wrong value for the restarting of the GMRES solver. It is set to " << restarting << ". Please set some positive value using the SetRestarting method." << std::endl; std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min << ", r_max = " << restarting_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting ) ) if( restarting_step_min < 0 || restarting_step_max < 0 || restarting_step_min > restarting_step_max ) { std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min << ", d_max = " << restarting_step_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting_max ) ) { std::cerr << "I am not able to allocate enough memory for the GMRES solver. You may try to decrease the restarting parameter." << std::endl; return false; Loading Loading @@ -161,10 +176,36 @@ solve( const Vector& b, Vector& x ) this->resetIterations(); this->setResidue( beta / normb ); // parameters for the adaptivity of the restarting parameter RealType beta_ratio = 1; // = beta / beta_ratio (small value indicates good convergence rate) const RealType max_beta_ratio = 0.99; // = cos(8°) \approx 0.99 const RealType min_beta_ratio = 0.175; // = cos(80°) \approx 0.175 int restart_cycles = 0; // counter of restart cycles int m = restarting_max; // current restarting parameter Containers::Vector< RealType, DeviceType, IndexType > vi, vk; while( this->checkNextIteration() ) { const IndexType m = restarting; // adaptivity of the restarting parameter if( restarting_max > restarting_min && restart_cycles > 0 ) { if( beta_ratio > max_beta_ratio ) // near stagnation -> set maximum m = restarting_max; else if( beta_ratio >= min_beta_ratio ) { // the step size is determined based on current m using linear interpolation // between restarting_step_min and restarting_step_max const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) / ( restarting_max - restarting_min ) * ( m - restarting_min ); if( m - step >= restarting_min ) m -= step; else // set restarting_max when we hit restarting_min (see Baker et al. (2009)) m = restarting_max; } // std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << " " << std::endl; } for( IndexType i = 0; i < m + 1; i ++ ) H[ i ] = s[ i ] = cs[ i ] = sn[ i ] = 0.0; Loading Loading @@ -279,6 +320,7 @@ solve( const Vector& b, Vector& x ) /**** * r = M.solve(b - A * x); */ const RealType beta_old = beta; beta = 0.0; if( preconditioner ) { Loading @@ -299,6 +341,9 @@ solve( const Vector& b, Vector& x ) //cout << " beta = " << beta << std::endl; //cout << "residue = " << beta / normb << std::endl; // update parameters for the adaptivity of the restarting parameter ++restart_cycles; beta_ratio = beta / beta_old; } this->refreshSolverMonitor( true ); return this->checkConvergence(); Loading Loading @@ -389,16 +434,16 @@ bool GMRES< Matrix, Preconditioner >:: setSize( IndexType _size, IndexType m ) { if( size == _size && restarting == m ) return true; if( size == _size && restarting_max == m ) return true; size = _size; restarting = m; restarting_max = m; if( ! _r.setSize( size ) || ! w.setSize( size ) || ! _s.setSize( restarting + 1 ) || ! _cs.setSize( restarting + 1 ) || ! _sn.setSize( restarting + 1 ) || ! _v.setSize( size * ( restarting + 1 ) ) || ! _H.setSize( ( restarting + 1 ) * restarting ) || ! _s.setSize( m + 1 ) || ! _cs.setSize( m + 1 ) || ! _sn.setSize( m + 1 ) || ! _v.setSize( size * ( m + 1 ) ) || ! _H.setSize( ( m + 1 ) * m ) || ! _M_tmp.setSize( size ) ) { std::cerr << "I could not allocate all supporting arrays for the GMRES solver." << std::endl; Loading src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h +6 −2 Original line number Diff line number Diff line Loading @@ -141,8 +141,12 @@ writeProlog( Logger& logger, logger.writeParameter< double >( "Adaptivity:", "merson-adaptivity", parameters, 1 ); if( solverName == "sor" ) logger.writeParameter< double >( "Omega:", "sor-omega", parameters, 1 ); if( solverName == "gmres" ) logger.writeParameter< int >( "Restarting:", "gmres-restarting", parameters, 1 ); if( solverName == "gmres" || solverName == "cwygmres" ) { logger.writeParameter< int >( "Restarting min:", "gmres-restarting-min", parameters, 1 ); logger.writeParameter< int >( "Restarting max:", "gmres-restarting-max", parameters, 1 ); logger.writeParameter< int >( "Restarting step min:", "gmres-restarting-step-min", parameters, 1 ); logger.writeParameter< int >( "Restarting step max:", "gmres-restarting-step-max", parameters, 1 ); } logger.writeParameter< double >( "Convergence residue:", "convergence-residue", parameters ); logger.writeParameter< double >( "Divergence residue:", "divergence-residue", parameters ); logger.writeParameter< int >( "Maximal number of iterations:", "max-iterations", parameters ); Loading Loading
src/TNL/Solvers/Linear/CWYGMRES.h +1 −1 Original line number Diff line number Diff line Loading @@ -119,7 +119,7 @@ protected: // host-only storage for Givens rotations and the least squares problem HostVector cs, sn, H, s; IndexType size, ldSize, restarting; IndexType size, ldSize, restarting_min, restarting_max, restarting_step_min, restarting_step_max; MatrixPointer matrix; PreconditionerPointer preconditioner; Loading
src/TNL/Solvers/Linear/CWYGMRES_impl.h +72 −28 Original line number Diff line number Diff line Loading @@ -29,7 +29,10 @@ CWYGMRES< Matrix, Preconditioner >:: CWYGMRES() : size( 0 ), ldSize( 0 ), restarting( 10 ) restarting_min( 10 ), restarting_max( 10 ), restarting_step_min( 3 ), restarting_step_max( 3 ) { /**** * Clearing the shared pointer means that there is no Loading Loading @@ -64,7 +67,10 @@ configSetup( Config::ConfigDescription& config, const String& prefix ) { //IterativeSolver< RealType, IndexType >::configSetup( config, prefix ); config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the CWYGMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); } template< typename Matrix, Loading @@ -75,7 +81,10 @@ setup( const Config::ParameterContainer& parameters, const String& prefix ) { IterativeSolver< RealType, IndexType >::setup( parameters, prefix ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) ); restarting_min = parameters.getParameter< int >( "gmres-restarting-min" ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) ); restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" ); restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" ); return true; } Loading @@ -87,7 +96,7 @@ setRestarting( IndexType rest ) { if( size != 0 ) setSize( size, rest ); restarting = rest; restarting_max = rest; } template< typename Matrix, Loading Loading @@ -116,13 +125,19 @@ CWYGMRES< Matrix, Preconditioner >:: solve( const Vector& b, Vector& x ) { TNL_ASSERT( matrix, std::cerr << "No matrix was set in CWYGMRES. Call setMatrix() before solve()." << std::endl ); if( restarting <= 0 ) if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max ) { std::cerr << "I have wrong value for the restarting of the CWYGMRES solver. It is set to " << restarting << ". Please set some positive value using the SetRestarting method." << std::endl; std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min << ", r_max = " << restarting_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting ) ) if( restarting_step_min < 0 || restarting_step_max < 0 || restarting_step_min > restarting_step_max ) { std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min << ", d_max = " << restarting_step_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting_max ) ) { std::cerr << "I am not able to allocate enough memory for the CWYGMRES solver. You may try to decrease the restarting parameter." << std::endl; return false; Loading Loading @@ -160,10 +175,35 @@ solve( const Vector& b, Vector& x ) this->resetIterations(); this->setResidue( beta / normb ); // parameters for the adaptivity of the restarting parameter RealType beta_ratio = 1; // = beta / beta_ratio (small value indicates good convergence rate) const RealType max_beta_ratio = 0.99; // = cos(8°) \approx 0.99 const RealType min_beta_ratio = 0.175; // = cos(80°) \approx 0.175 int restart_cycles = 0; // counter of restart cycles int m = restarting_max; // current restarting parameter DeviceVector vi, vk; while( this->checkNextIteration() ) { const IndexType m = restarting; // adaptivity of the restarting parameter if( restart_cycles > 0 ) { if( beta_ratio > max_beta_ratio ) // near stagnation -> set maximum m = restarting_max; else if( beta_ratio >= min_beta_ratio ) { // the step size is determined based on current m using linear interpolation // between restarting_step_min and restarting_step_max const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) / ( restarting_max - restarting_min ) * ( m - restarting_min ); if( m - step >= restarting_min ) m -= step; else // set restarting_max when we hit restarting_min (see Baker et al. (2009)) m = restarting_max; } // std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << " " << std::endl; } /*** * z = r / | r | = 1.0 / beta * r Loading Loading @@ -295,6 +335,7 @@ solve( const Vector& b, Vector& x ) /**** * r = M.solve(b - A * x); */ const RealType beta_old = beta; if( preconditioner ) { matrix->vectorProduct( x, _M_tmp ); Loading @@ -313,6 +354,9 @@ solve( const Vector& b, Vector& x ) // cout << " beta = " << beta << endl; // cout << "residue = " << beta / normb << endl; // update parameters for the adaptivity of the restarting parameter ++restart_cycles; beta_ratio = beta / beta_old; } this->refreshSolverMonitor( true ); return this->checkConvergence(); Loading Loading @@ -401,7 +445,7 @@ hauseholder_generate( DeviceVector& Y, // assuming it's stable enough... const RealType t_i = 2.0 / (norm_yi * norm_yi); T[ i + i * (restarting + 1) ] = t_i; T[ i + i * (restarting_max + 1) ] = t_i; if( i > 0 ) { // aux = Y_{i-1}^T * y_i RealType aux[ i ]; Loading @@ -421,9 +465,9 @@ hauseholder_generate( DeviceVector& Y, // [T_i]_{0..i-1} = - T_{i-1} * t_i * aux for( int k = 0; k < i; k++ ) { T[ k + i * (restarting + 1) ] = 0.0; T[ k + i * (restarting_max + 1) ] = 0.0; for( int j = k; j < i; j++ ) T[ k + i * (restarting + 1) ] -= T[ k + j * (restarting + 1) ] * (t_i * aux[ j ]); T[ k + i * (restarting_max + 1) ] -= T[ k + j * (restarting_max + 1) ] * (t_i * aux[ j ]); } } } Loading @@ -441,7 +485,7 @@ hauseholder_apply_trunc( HostVector& out, DeviceVector y_i; y_i.bind( &Y.getData()[ i * ldSize ], size ); const RealType aux = T[ i + i * (restarting + 1) ] * y_i.scalarProduct( z ); const RealType aux = T[ i + i * (restarting_max + 1) ] * y_i.scalarProduct( z ); if( std::is_same< DeviceType, Devices::Host >::value ) { for( int k = 0; k <= i; k++ ) out[ k ] = z[ k ] - y_i[ k ] * aux; Loading @@ -449,9 +493,9 @@ hauseholder_apply_trunc( HostVector& out, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // copy part of y_i to buffer on host // here we duplicate the upper (m+1)x(m+1) submatrix of Y on host for fast access RealType* host_yi = &YL[ i * (restarting + 1) ]; RealType* host_yi = &YL[ i * (restarting_max + 1) ]; RealType host_z[ i + 1 ]; if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting + 1 ) || if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting_max + 1 ) || ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_z, z.getData(), i + 1 ) ) { std::cerr << "Failed to copy part of device vectors y_i or z to host buffer." << std::endl; Loading Loading @@ -480,7 +524,7 @@ hauseholder_cwy( DeviceVector& v, if( std::is_same< DeviceType, Devices::Cuda >::value ) { // the upper (m+1)x(m+1) submatrix of Y is duplicated on host for fast access for( int k = 0; k <= i; k++ ) aux[ k ] = YL[ i + k * (restarting + 1) ]; aux[ k ] = YL[ i + k * (restarting_max + 1) ]; } // aux = T_i * aux Loading @@ -488,7 +532,7 @@ hauseholder_cwy( DeviceVector& v, for( int k = 0; k <= i; k++ ) { RealType aux2 = 0.0; for( int j = k; j <= i; j++ ) aux2 += T[ k + j * (restarting + 1) ] * aux[ j ]; aux2 += T[ k + j * (restarting_max + 1) ] * aux[ j ]; aux[ k ] = aux2; } Loading Loading @@ -530,7 +574,7 @@ hauseholder_cwy_transposed( DeviceVector& z, for( int k = i; k >= 0; k-- ) { RealType aux2 = 0.0; for( int j = 0; j <= k; j++ ) aux2 += T[ j + k * (restarting + 1) ] * aux[ j ]; aux2 += T[ j + k * (restarting_max + 1) ] * aux[ j ]; aux[ k ] = aux2; } Loading Loading @@ -631,7 +675,7 @@ template< typename Matrix, typename Preconditioner > bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m ) { if( size == _size && restarting == m ) return true; if( size == _size && restarting_max == m ) return true; size = _size; if( std::is_same< DeviceType, Devices::Cuda >::value ) // align each column to 256 bytes - optimal for CUDA Loading @@ -639,18 +683,18 @@ bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m else // on the host, we add 1 to disrupt the cache false-sharing pattern ldSize = roundToMultiple( size, 256 / sizeof( RealType ) ) + 1; restarting = m; restarting_max = m; if( ! r.setSize( size ) || ! z.setSize( size ) || ! w.setSize( size ) || ! V.setSize( ldSize * ( restarting + 1 ) ) || ! Y.setSize( ldSize * ( restarting + 1 ) ) || ! T.setSize( (restarting + 1) * (restarting + 1) ) || ! YL.setSize( (restarting + 1) * (restarting + 1) ) || ! cs.setSize( restarting + 1 ) || ! sn.setSize( restarting + 1 ) || ! H.setSize( ( restarting + 1 ) * restarting ) || ! s.setSize( restarting + 1 ) || ! V.setSize( ldSize * ( m + 1 ) ) || ! Y.setSize( ldSize * ( m + 1 ) ) || ! T.setSize( (m + 1) * (m + 1) ) || ! YL.setSize( (m + 1) * (m + 1) ) || ! cs.setSize( m + 1 ) || ! sn.setSize( m + 1 ) || ! H.setSize( ( m + 1 ) * m ) || ! s.setSize( m + 1 ) || ! _M_tmp.setSize( size ) ) { std::cerr << "I could not allocate all supporting arrays for the CWYGMRES solver." << std::endl; Loading
src/TNL/Solvers/Linear/GMRES.h +1 −1 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ protected: Containers::Vector< RealType, DeviceType, IndexType > _r, w, _v, _M_tmp; Containers::Vector< RealType, Devices::Host, IndexType > _s, _cs, _sn, _H; IndexType size, restarting; IndexType size, restarting_min, restarting_max, restarting_step_min, restarting_step_max; MatrixPointer matrix; Loading
src/TNL/Solvers/Linear/GMRES_impl.h +61 −16 Original line number Diff line number Diff line Loading @@ -21,7 +21,10 @@ template< typename Matrix, GMRES< Matrix, Preconditioner >:: GMRES() : size( 0 ), restarting( 10 ) restarting_min( 10 ), restarting_max( 10 ), restarting_step_min( 3 ), restarting_step_max( 3 ) { /**** * Clearing the shared pointer means that there is no Loading Loading @@ -56,7 +59,10 @@ configSetup( Config::ConfigDescription& config, const String& prefix ) { //IterativeSolver< RealType, IndexType >::configSetup( config, prefix ); config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 ); config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 ); } template< typename Matrix, Loading @@ -67,7 +73,10 @@ setup( const Config::ParameterContainer& parameters, const String& prefix ) { IterativeSolver< RealType, IndexType >::setup( parameters, prefix ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) ); restarting_min = parameters.getParameter< int >( "gmres-restarting-min" ); this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) ); restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" ); restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" ); return true; } Loading @@ -79,7 +88,7 @@ setRestarting( IndexType rest ) { if( size != 0 ) setSize( size, rest ); restarting = rest; restarting_max = rest; } template< typename Matrix, Loading Loading @@ -108,13 +117,19 @@ GMRES< Matrix, Preconditioner >:: solve( const Vector& b, Vector& x ) { TNL_ASSERT( matrix, std::cerr << "No matrix was set in GMRES. Call setMatrix() before solve()." << std::endl ); if( restarting <= 0 ) if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max ) { std::cerr << "I have wrong value for the restarting of the GMRES solver. It is set to " << restarting << ". Please set some positive value using the SetRestarting method." << std::endl; std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min << ", r_max = " << restarting_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting ) ) if( restarting_step_min < 0 || restarting_step_max < 0 || restarting_step_min > restarting_step_max ) { std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min << ", d_max = " << restarting_step_max << std::endl; return false; } if( ! setSize( matrix -> getRows(), restarting_max ) ) { std::cerr << "I am not able to allocate enough memory for the GMRES solver. You may try to decrease the restarting parameter." << std::endl; return false; Loading Loading @@ -161,10 +176,36 @@ solve( const Vector& b, Vector& x ) this->resetIterations(); this->setResidue( beta / normb ); // parameters for the adaptivity of the restarting parameter RealType beta_ratio = 1; // = beta / beta_ratio (small value indicates good convergence rate) const RealType max_beta_ratio = 0.99; // = cos(8°) \approx 0.99 const RealType min_beta_ratio = 0.175; // = cos(80°) \approx 0.175 int restart_cycles = 0; // counter of restart cycles int m = restarting_max; // current restarting parameter Containers::Vector< RealType, DeviceType, IndexType > vi, vk; while( this->checkNextIteration() ) { const IndexType m = restarting; // adaptivity of the restarting parameter if( restarting_max > restarting_min && restart_cycles > 0 ) { if( beta_ratio > max_beta_ratio ) // near stagnation -> set maximum m = restarting_max; else if( beta_ratio >= min_beta_ratio ) { // the step size is determined based on current m using linear interpolation // between restarting_step_min and restarting_step_max const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) / ( restarting_max - restarting_min ) * ( m - restarting_min ); if( m - step >= restarting_min ) m -= step; else // set restarting_max when we hit restarting_min (see Baker et al. (2009)) m = restarting_max; } // std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << " " << std::endl; } for( IndexType i = 0; i < m + 1; i ++ ) H[ i ] = s[ i ] = cs[ i ] = sn[ i ] = 0.0; Loading Loading @@ -279,6 +320,7 @@ solve( const Vector& b, Vector& x ) /**** * r = M.solve(b - A * x); */ const RealType beta_old = beta; beta = 0.0; if( preconditioner ) { Loading @@ -299,6 +341,9 @@ solve( const Vector& b, Vector& x ) //cout << " beta = " << beta << std::endl; //cout << "residue = " << beta / normb << std::endl; // update parameters for the adaptivity of the restarting parameter ++restart_cycles; beta_ratio = beta / beta_old; } this->refreshSolverMonitor( true ); return this->checkConvergence(); Loading Loading @@ -389,16 +434,16 @@ bool GMRES< Matrix, Preconditioner >:: setSize( IndexType _size, IndexType m ) { if( size == _size && restarting == m ) return true; if( size == _size && restarting_max == m ) return true; size = _size; restarting = m; restarting_max = m; if( ! _r.setSize( size ) || ! w.setSize( size ) || ! _s.setSize( restarting + 1 ) || ! _cs.setSize( restarting + 1 ) || ! _sn.setSize( restarting + 1 ) || ! _v.setSize( size * ( restarting + 1 ) ) || ! _H.setSize( ( restarting + 1 ) * restarting ) || ! _s.setSize( m + 1 ) || ! _cs.setSize( m + 1 ) || ! _sn.setSize( m + 1 ) || ! _v.setSize( size * ( m + 1 ) ) || ! _H.setSize( ( m + 1 ) * m ) || ! _M_tmp.setSize( size ) ) { std::cerr << "I could not allocate all supporting arrays for the GMRES solver." << std::endl; Loading
src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h +6 −2 Original line number Diff line number Diff line Loading @@ -141,8 +141,12 @@ writeProlog( Logger& logger, logger.writeParameter< double >( "Adaptivity:", "merson-adaptivity", parameters, 1 ); if( solverName == "sor" ) logger.writeParameter< double >( "Omega:", "sor-omega", parameters, 1 ); if( solverName == "gmres" ) logger.writeParameter< int >( "Restarting:", "gmres-restarting", parameters, 1 ); if( solverName == "gmres" || solverName == "cwygmres" ) { logger.writeParameter< int >( "Restarting min:", "gmres-restarting-min", parameters, 1 ); logger.writeParameter< int >( "Restarting max:", "gmres-restarting-max", parameters, 1 ); logger.writeParameter< int >( "Restarting step min:", "gmres-restarting-step-min", parameters, 1 ); logger.writeParameter< int >( "Restarting step max:", "gmres-restarting-step-max", parameters, 1 ); } logger.writeParameter< double >( "Convergence residue:", "convergence-residue", parameters ); logger.writeParameter< double >( "Divergence residue:", "divergence-residue", parameters ); logger.writeParameter< int >( "Maximal number of iterations:", "max-iterations", parameters ); Loading