Adaptivity of the GMRES restarting parameter (7e7b254c) · Commits · TNL / tnl-dev

src/TNL/Solvers/Linear/CWYGMRES.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -119,7 +119,7 @@ protected:
		// host-only storage for Givens rotations and the least squares problem
		HostVector cs, sn, H, s;

		IndexType size, ldSize, restarting;
		IndexType size, ldSize, restarting_min, restarting_max, restarting_step_min, restarting_step_max;

		MatrixPointer matrix;
		PreconditionerPointer preconditioner;

src/TNL/Solvers/Linear/CWYGMRES_impl.h

+72 −28

Original line number	Diff line number	Diff line
		@@ -29,7 +29,10 @@ CWYGMRES< Matrix, Preconditioner >::
		CWYGMRES()
		: size( 0 ),
		ldSize( 0 ),
		restarting( 10 )
		restarting_min( 10 ),
		restarting_max( 10 ),
		restarting_step_min( 3 ),
		restarting_step_max( 3 )
		{
		/****
		* Clearing the shared pointer means that there is no
		@@ -64,7 +67,10 @@ configSetup( Config::ConfigDescription& config,
		const String& prefix )
		{
		//IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
		config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the CWYGMRES restarts.", 10 );
		config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 );
		config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 );
		config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
		config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
		}

		template< typename Matrix,
		@@ -75,7 +81,10 @@ setup( const Config::ParameterContainer& parameters,
		const String& prefix )
		{
		IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
		this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) );
		restarting_min = parameters.getParameter< int >( "gmres-restarting-min" );
		this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) );
		restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" );
		restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" );
		return true;
		}

		@@ -87,7 +96,7 @@ setRestarting( IndexType rest )
		{
		if( size != 0 )
		setSize( size, rest );
		restarting = rest;
		restarting_max = rest;
		}

		template< typename Matrix,
		@@ -116,13 +125,19 @@ CWYGMRES< Matrix, Preconditioner >::
		solve( const Vector& b, Vector& x )
		{
		TNL_ASSERT( matrix, std::cerr << "No matrix was set in CWYGMRES. Call setMatrix() before solve()." << std::endl );
		if( restarting <= 0 )
		if( restarting_min <= 0 \|\| restarting_max <= 0 \|\| restarting_min > restarting_max )
		{
		std::cerr << "I have wrong value for the restarting of the CWYGMRES solver. It is set to " << restarting
		<< ". Please set some positive value using the SetRestarting method." << std::endl;
		std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min
		<< ", r_max = " << restarting_max << std::endl;
		return false;
		}
		if( ! setSize( matrix -> getRows(), restarting ) )
		if( restarting_step_min < 0 \|\| restarting_step_max < 0 \|\| restarting_step_min > restarting_step_max )
		{
		std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min
		<< ", d_max = " << restarting_step_max << std::endl;
		return false;
		}
		if( ! setSize( matrix -> getRows(), restarting_max ) )
		{
		std::cerr << "I am not able to allocate enough memory for the CWYGMRES solver. You may try to decrease the restarting parameter." << std::endl;
		return false;
		@@ -160,10 +175,35 @@ solve( const Vector& b, Vector& x )
		this->resetIterations();
		this->setResidue( beta / normb );

		// parameters for the adaptivity of the restarting parameter
		RealType beta_ratio = 1; // = beta / beta_ratio (small value indicates good convergence rate)
		const RealType max_beta_ratio = 0.99; // = cos(8°) \approx 0.99
		const RealType min_beta_ratio = 0.175; // = cos(80°) \approx 0.175
		int restart_cycles = 0; // counter of restart cycles
		int m = restarting_max; // current restarting parameter

		DeviceVector vi, vk;
		while( this->checkNextIteration() )
		{
		const IndexType m = restarting;
		// adaptivity of the restarting parameter
		if( restart_cycles > 0 ) {
		if( beta_ratio > max_beta_ratio )
		// near stagnation -> set maximum
		m = restarting_max;
		else if( beta_ratio >= min_beta_ratio ) {
		// the step size is determined based on current m using linear interpolation
		// between restarting_step_min and restarting_step_max
		const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) /
		( restarting_max - restarting_min ) *
		( m - restarting_min );
		if( m - step >= restarting_min )
		m -= step;
		else
		// set restarting_max when we hit restarting_min (see Baker et al. (2009))
		m = restarting_max;
		}
		// std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << " " << std::endl;
		}

		/***
		* z = r / \| r \| = 1.0 / beta * r
		@@ -295,6 +335,7 @@ solve( const Vector& b, Vector& x )
		/****
		* r = M.solve(b - A * x);
		*/
		const RealType beta_old = beta;
		if( preconditioner )
		{
		matrix->vectorProduct( x, _M_tmp );
		@@ -313,6 +354,9 @@ solve( const Vector& b, Vector& x )
		// cout << " beta = " << beta << endl;
		// cout << "residue = " << beta / normb << endl;

		// update parameters for the adaptivity of the restarting parameter
		++restart_cycles;
		beta_ratio = beta / beta_old;
		}
		this->refreshSolverMonitor( true );
		return this->checkConvergence();
		@@ -401,7 +445,7 @@ hauseholder_generate( DeviceVector& Y,
		// assuming it's stable enough...
		const RealType t_i = 2.0 / (norm_yi * norm_yi);

		T[ i + i * (restarting + 1) ] = t_i;
		T[ i + i * (restarting_max + 1) ] = t_i;
		if( i > 0 ) {
		// aux = Y_{i-1}^T * y_i
		RealType aux[ i ];
		@@ -421,9 +465,9 @@ hauseholder_generate( DeviceVector& Y,

		// [T_i]_{0..i-1} = - T_{i-1} * t_i * aux
		for( int k = 0; k < i; k++ ) {
		T[ k + i * (restarting + 1) ] = 0.0;
		T[ k + i * (restarting_max + 1) ] = 0.0;
		for( int j = k; j < i; j++ )
		T[ k + i * (restarting + 1) ] -= T[ k + j * (restarting + 1) ] * (t_i * aux[ j ]);
		T[ k + i * (restarting_max + 1) ] -= T[ k + j * (restarting_max + 1) ] * (t_i * aux[ j ]);
		}
		}
		}
		@@ -441,7 +485,7 @@ hauseholder_apply_trunc( HostVector& out,
		DeviceVector y_i;
		y_i.bind( &Y.getData()[ i * ldSize ], size );

		const RealType aux = T[ i + i * (restarting + 1) ] * y_i.scalarProduct( z );
		const RealType aux = T[ i + i * (restarting_max + 1) ] * y_i.scalarProduct( z );
		if( std::is_same< DeviceType, Devices::Host >::value ) {
		for( int k = 0; k <= i; k++ )
		out[ k ] = z[ k ] - y_i[ k ] * aux;
		@@ -449,9 +493,9 @@ hauseholder_apply_trunc( HostVector& out,
		if( std::is_same< DeviceType, Devices::Cuda >::value ) {
		// copy part of y_i to buffer on host
		// here we duplicate the upper (m+1)x(m+1) submatrix of Y on host for fast access
		RealType* host_yi = &YL[ i * (restarting + 1) ];
		RealType* host_yi = &YL[ i * (restarting_max + 1) ];
		RealType host_z[ i + 1 ];
		if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting + 1 ) \|\|
		if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting_max + 1 ) \|\|
		! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_z, z.getData(), i + 1 ) )
		{
		std::cerr << "Failed to copy part of device vectors y_i or z to host buffer." << std::endl;
		@@ -480,7 +524,7 @@ hauseholder_cwy( DeviceVector& v,
		if( std::is_same< DeviceType, Devices::Cuda >::value ) {
		// the upper (m+1)x(m+1) submatrix of Y is duplicated on host for fast access
		for( int k = 0; k <= i; k++ )
		aux[ k ] = YL[ i + k * (restarting + 1) ];
		aux[ k ] = YL[ i + k * (restarting_max + 1) ];
		}

		// aux = T_i * aux
		@@ -488,7 +532,7 @@ hauseholder_cwy( DeviceVector& v,
		for( int k = 0; k <= i; k++ ) {
		RealType aux2 = 0.0;
		for( int j = k; j <= i; j++ )
		aux2 += T[ k + j * (restarting + 1) ] * aux[ j ];
		aux2 += T[ k + j * (restarting_max + 1) ] * aux[ j ];
		aux[ k ] = aux2;
		}

		@@ -530,7 +574,7 @@ hauseholder_cwy_transposed( DeviceVector& z,
		for( int k = i; k >= 0; k-- ) {
		RealType aux2 = 0.0;
		for( int j = 0; j <= k; j++ )
		aux2 += T[ j + k * (restarting + 1) ] * aux[ j ];
		aux2 += T[ j + k * (restarting_max + 1) ] * aux[ j ];
		aux[ k ] = aux2;
		}

		@@ -631,7 +675,7 @@ template< typename Matrix,
		typename Preconditioner >
		bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m )
		{
		if( size == _size && restarting == m ) return true;
		if( size == _size && restarting_max == m ) return true;
		size = _size;
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		// align each column to 256 bytes - optimal for CUDA
		@@ -639,18 +683,18 @@ bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m
		else
		// on the host, we add 1 to disrupt the cache false-sharing pattern
		ldSize = roundToMultiple( size, 256 / sizeof( RealType ) ) + 1;
		restarting = m;
		restarting_max = m;
		if( ! r.setSize( size ) \|\|
		! z.setSize( size ) \|\|
		! w.setSize( size ) \|\|
		! V.setSize( ldSize * ( restarting + 1 ) ) \|\|
		! Y.setSize( ldSize * ( restarting + 1 ) ) \|\|
		! T.setSize( (restarting + 1) * (restarting + 1) ) \|\|
		! YL.setSize( (restarting + 1) * (restarting + 1) ) \|\|
		! cs.setSize( restarting + 1 ) \|\|
		! sn.setSize( restarting + 1 ) \|\|
		! H.setSize( ( restarting + 1 ) * restarting ) \|\|
		! s.setSize( restarting + 1 ) \|\|
		! V.setSize( ldSize * ( m + 1 ) ) \|\|
		! Y.setSize( ldSize * ( m + 1 ) ) \|\|
		! T.setSize( (m + 1) * (m + 1) ) \|\|
		! YL.setSize( (m + 1) * (m + 1) ) \|\|
		! cs.setSize( m + 1 ) \|\|
		! sn.setSize( m + 1 ) \|\|
		! H.setSize( ( m + 1 ) * m ) \|\|
		! s.setSize( m + 1 ) \|\|
		! _M_tmp.setSize( size ) )
		{
		std::cerr << "I could not allocate all supporting arrays for the CWYGMRES solver." << std::endl;

src/TNL/Solvers/Linear/GMRES.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -87,7 +87,7 @@ protected:
		Containers::Vector< RealType, DeviceType, IndexType > _r, w, _v, _M_tmp;
		Containers::Vector< RealType, Devices::Host, IndexType > _s, _cs, _sn, _H;

		IndexType size, restarting;
		IndexType size, restarting_min, restarting_max, restarting_step_min, restarting_step_max;

		MatrixPointer matrix;

src/TNL/Solvers/Linear/GMRES_impl.h

+61 −16

Original line number	Diff line number	Diff line
		@@ -21,7 +21,10 @@ template< typename Matrix,
		GMRES< Matrix, Preconditioner >::
		GMRES()
		: size( 0 ),
		restarting( 10 )
		restarting_min( 10 ),
		restarting_max( 10 ),
		restarting_step_min( 3 ),
		restarting_step_max( 3 )
		{
		/****
		* Clearing the shared pointer means that there is no
		@@ -56,7 +59,10 @@ configSetup( Config::ConfigDescription& config,
		const String& prefix )
		{
		//IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
		config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the GMRES restarts.", 10 );
		config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 );
		config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 );
		config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
		config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
		}

		template< typename Matrix,
		@@ -67,7 +73,10 @@ setup( const Config::ParameterContainer& parameters,
		const String& prefix )
		{
		IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
		this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) );
		restarting_min = parameters.getParameter< int >( "gmres-restarting-min" );
		this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) );
		restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" );
		restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" );
		return true;
		}

		@@ -79,7 +88,7 @@ setRestarting( IndexType rest )
		{
		if( size != 0 )
		setSize( size, rest );
		restarting = rest;
		restarting_max = rest;
		}

		template< typename Matrix,
		@@ -108,13 +117,19 @@ GMRES< Matrix, Preconditioner >::
		solve( const Vector& b, Vector& x )
		{
		TNL_ASSERT( matrix, std::cerr << "No matrix was set in GMRES. Call setMatrix() before solve()." << std::endl );
		if( restarting <= 0 )
		if( restarting_min <= 0 \|\| restarting_max <= 0 \|\| restarting_min > restarting_max )
		{
		std::cerr << "I have wrong value for the restarting of the GMRES solver. It is set to " << restarting
		<< ". Please set some positive value using the SetRestarting method." << std::endl;
		std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min
		<< ", r_max = " << restarting_max << std::endl;
		return false;
		}
		if( ! setSize( matrix -> getRows(), restarting ) )
		if( restarting_step_min < 0 \|\| restarting_step_max < 0 \|\| restarting_step_min > restarting_step_max )
		{
		std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min
		<< ", d_max = " << restarting_step_max << std::endl;
		return false;
		}
		if( ! setSize( matrix -> getRows(), restarting_max ) )
		{
		std::cerr << "I am not able to allocate enough memory for the GMRES solver. You may try to decrease the restarting parameter." << std::endl;
		return false;
		@@ -161,10 +176,36 @@ solve( const Vector& b, Vector& x )
		this->resetIterations();
		this->setResidue( beta / normb );

		// parameters for the adaptivity of the restarting parameter
		RealType beta_ratio = 1; // = beta / beta_ratio (small value indicates good convergence rate)
		const RealType max_beta_ratio = 0.99; // = cos(8°) \approx 0.99
		const RealType min_beta_ratio = 0.175; // = cos(80°) \approx 0.175
		int restart_cycles = 0; // counter of restart cycles
		int m = restarting_max; // current restarting parameter

		Containers::Vector< RealType, DeviceType, IndexType > vi, vk;
		while( this->checkNextIteration() )
		{
		const IndexType m = restarting;
		// adaptivity of the restarting parameter
		if( restarting_max > restarting_min && restart_cycles > 0 ) {
		if( beta_ratio > max_beta_ratio )
		// near stagnation -> set maximum
		m = restarting_max;
		else if( beta_ratio >= min_beta_ratio ) {
		// the step size is determined based on current m using linear interpolation
		// between restarting_step_min and restarting_step_max
		const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) /
		( restarting_max - restarting_min ) *
		( m - restarting_min );
		if( m - step >= restarting_min )
		m -= step;
		else
		// set restarting_max when we hit restarting_min (see Baker et al. (2009))
		m = restarting_max;
		}
		// std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << " " << std::endl;
		}

		for( IndexType i = 0; i < m + 1; i ++ )
		H[ i ] = s[ i ] = cs[ i ] = sn[ i ] = 0.0;

		@@ -279,6 +320,7 @@ solve( const Vector& b, Vector& x )
		/****
		* r = M.solve(b - A * x);
		*/
		const RealType beta_old = beta;
		beta = 0.0;
		if( preconditioner )
		{
		@@ -299,6 +341,9 @@ solve( const Vector& b, Vector& x )
		//cout << " beta = " << beta << std::endl;
		//cout << "residue = " << beta / normb << std::endl;

		// update parameters for the adaptivity of the restarting parameter
		++restart_cycles;
		beta_ratio = beta / beta_old;
		}
		this->refreshSolverMonitor( true );
		return this->checkConvergence();
		@@ -389,16 +434,16 @@ bool
		GMRES< Matrix, Preconditioner >::
		setSize( IndexType _size, IndexType m )
		{
		if( size == _size && restarting == m ) return true;
		if( size == _size && restarting_max == m ) return true;
		size = _size;
		restarting = m;
		restarting_max = m;
		if( ! _r.setSize( size ) \|\|
		! w.setSize( size ) \|\|
		! _s.setSize( restarting + 1 ) \|\|
		! _cs.setSize( restarting + 1 ) \|\|
		! _sn.setSize( restarting + 1 ) \|\|
		! _v.setSize( size * ( restarting + 1 ) ) \|\|
		! _H.setSize( ( restarting + 1 ) * restarting ) \|\|
		! _s.setSize( m + 1 ) \|\|
		! _cs.setSize( m + 1 ) \|\|
		! _sn.setSize( m + 1 ) \|\|
		! _v.setSize( size * ( m + 1 ) ) \|\|
		! _H.setSize( ( m + 1 ) * m ) \|\|
		! _M_tmp.setSize( size ) )
		{
		std::cerr << "I could not allocate all supporting arrays for the GMRES solver." << std::endl;

src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h

+6 −2

Original line number	Diff line number	Diff line
		@@ -141,8 +141,12 @@ writeProlog( Logger& logger,
		logger.writeParameter< double >( "Adaptivity:", "merson-adaptivity", parameters, 1 );
		if( solverName == "sor" )
		logger.writeParameter< double >( "Omega:", "sor-omega", parameters, 1 );
		if( solverName == "gmres" )
		logger.writeParameter< int >( "Restarting:", "gmres-restarting", parameters, 1 );
		if( solverName == "gmres" \|\| solverName == "cwygmres" ) {
		logger.writeParameter< int >( "Restarting min:", "gmres-restarting-min", parameters, 1 );
		logger.writeParameter< int >( "Restarting max:", "gmres-restarting-max", parameters, 1 );
		logger.writeParameter< int >( "Restarting step min:", "gmres-restarting-step-min", parameters, 1 );
		logger.writeParameter< int >( "Restarting step max:", "gmres-restarting-step-max", parameters, 1 );
		}
		logger.writeParameter< double >( "Convergence residue:", "convergence-residue", parameters );
		logger.writeParameter< double >( "Divergence residue:", "divergence-residue", parameters );
		logger.writeParameter< int >( "Maximal number of iterations:", "max-iterations", parameters );