diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h index f1a4b87328a630411c634bed6744494f76afea02..e1c02f0ab5eadbaea7ba2c2678641b0c1a05ee6b 100644 --- a/src/TNL/Solvers/Linear/GMRES.h +++ b/src/TNL/Solvers/Linear/GMRES.h @@ -53,7 +53,16 @@ protected: using HostView = typename DeviceView::template Self< RealType, Devices::Host >; using HostVector = typename DeviceVector::template Self< RealType, Devices::Host >;; - enum class Variant { MGS, MGSR, CWY }; + enum class Variant { CGS, CGSR, MGS, MGSR, CWY }; + +// nvcc allows __cuda_callable__ lambdas only in public methods +#ifdef __NVCC__ +public: +#endif + int orthogonalize_CGS( const int m, const RealType normb, const RealType beta ); +#ifdef __NVCC__ +protected: +#endif int orthogonalize_MGS( const int m, const RealType normb, const RealType beta ); diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h index d6cb8fdd095120c35cbf303a734ed7c5667bb79d..8e653e28b2d783e4a342f899eb794554d11c131a 100644 --- a/src/TNL/Solvers/Linear/GMRES_impl.h +++ b/src/TNL/Solvers/Linear/GMRES_impl.h @@ -31,6 +31,8 @@ configSetup( Config::ConfigDescription& config, const String& prefix ) { config.addEntry< String >( prefix + "gmres-variant", "Minimal number of iterations after which the GMRES restarts.", "CWY" ); + config.addEntryEnum( "CGS" ); + config.addEntryEnum( "CGSR" ); config.addEntryEnum( "MGS" ); config.addEntryEnum( "MGSR" ); config.addEntryEnum( "CWY" ); @@ -47,7 +49,11 @@ setup( const Config::ParameterContainer& parameters, const String& prefix ) { const String var = parameters.getParameter< String >( prefix + "gmres-variant" ); - if( var == "MGS" ) + if( var == "CGS" ) + variant = Variant::CGS; + else if( var == "CGSR" ) + variant = Variant::CGSR; + else if( var == "MGS" ) variant = Variant::MGS; else if( var == "MGSR" ) variant = Variant::MGSR; @@ -133,6 +139,10 @@ solve( ConstVectorViewType b, VectorViewType x ) // orthogonalization int o_steps = 0; switch( variant ) { + case Variant::CGS: + case Variant::CGSR: + o_steps = orthogonalize_CGS( m, normb, beta ); + break; case Variant::MGS: case Variant::MGSR: o_steps = orthogonalize_MGS( m, normb, beta ); @@ -167,6 +177,102 @@ solve( ConstVectorViewType b, VectorViewType x ) return this->checkConvergence(); } +template< typename Matrix > +int +GMRES< Matrix >:: +orthogonalize_CGS( const int m, const RealType normb, const RealType beta ) +{ + // initial binding to _M_tmp sets the correct local range, global size and + // communication group for distributed views + VectorViewType v_i( _M_tmp.getView() ); + VectorViewType v_k( _M_tmp.getView() ); + + /*** + * v_0 = r / | r | = 1.0 / beta * r + */ + v_i.bind( V.getData(), size ); + v_i = (1.0 / beta) * r; + + H.setValue( 0.0 ); + s.setValue( 0.0 ); + s[ 0 ] = beta; + + /**** + * Starting m-loop + */ + for( int i = 0; i < m && this->nextIteration(); i++ ) { + v_i.bind( &V.getData()[ i * ldSize ], size ); + /**** + * Solve w from M w = A v_i + */ + preconditioned_matvec( w, v_i ); + + for( int k = 0; k <= i; k++ ) + H[ k + i * (m + 1) ] = 0.0; + const int reorthogonalize = (variant == Variant::CGSR) ? 2 : 1; + for( int l = 0; l < reorthogonalize; l++ ) { + // auxiliary array for the H coefficients of the current l-loop + RealType H_l[i + 1]; + + // CGS part 1: compute projection coefficients +// for( int k = 0; k <= i; k++ ) { +// v_k.bind( &V.getData()[ k * ldSize ], size ); +// H_l[k] = (w, v_k); +// H[ k + i * (m + 1) ] += H_l[k]; +// } + // H_l = V_i^T * w + const RealType* _V = V.getData(); + const RealType* _w = Traits::getConstLocalView( w ).getData(); + const IndexType ldSize = this->ldSize; + auto fetch = [_V, _w, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _V[ idx + k * ldSize ] * _w[ idx ]; }; + Algorithms::Multireduction< DeviceType >::reduce + ( (RealType) 0, + fetch, + std::plus<>{}, + size, + i + 1, + H_l ); + for( int k = 0; k <= i; k++ ) + H[ k + i * (m + 1) ] += H_l[k]; + + // CGS part 2: subtract the projections +// for( int k = 0; k <= i; k++ ) { +// v_k.bind( &V.getData()[ k * ldSize ], size ); +// w = w - H_l[k] * v_k; +// } + // w := w - V_i * H_l + Matrices::MatrixOperations< DeviceType >:: + gemv( size, i + 1, + -1.0, V.getData(), ldSize, H_l, + 1.0, Traits::getLocalView( w ).getData() ); + } + /*** + * H_{i+1,i} = |w| + */ + RealType normw = lpNorm( w, 2.0 ); + H[ i + 1 + i * (m + 1) ] = normw; + + /*** + * v_{i+1} = w / |w| + */ + v_i.bind( &V.getData()[ ( i + 1 ) * ldSize ], size ); + v_i = (1.0 / normw) * w; + + /**** + * Applying the Givens rotations G_0, ..., G_i + */ + apply_givens_rotations( i, m ); + + this->setResidue( std::fabs( s[ i + 1 ] ) / normb ); + if( ! this->checkNextIteration() ) + return i; + else + this->refreshSolverMonitor(); + } + + return m; +} + template< typename Matrix > int GMRES< Matrix >:: @@ -198,13 +304,13 @@ orthogonalize_MGS( const int m, const RealType normb, const RealType beta ) preconditioned_matvec( w, v_i ); for( int k = 0; k <= i; k++ ) - H[ k + i * ( m + 1 ) ] = 0.0; + H[ k + i * (m + 1) ] = 0.0; const int reorthogonalize = (variant == Variant::MGSR) ? 2 : 1; for( int l = 0; l < reorthogonalize; l++ ) for( int k = 0; k <= i; k++ ) { v_k.bind( &V.getData()[ k * ldSize ], size ); /*** - * H_{k,i} = ( w, v_k ) + * H_{k,i} = (w, v_k) */ RealType H_k_i = (w, v_k); H[ k + i * (m + 1) ] += H_k_i;