Remove code duplication between backtracking algorithms (2eb2ffbf) · Commits · Jakub Klinkovský / TNL gradient descent

include/gdcpp.h

+68 −128

Original line number	Original line	Diff line number	Diff line
	@@ -382,29 +382,25 @@ namespace gdc
	}		}
	};		};

	/** Step size functor to perform Wolfe Linesearch with backtracking.		/** Step size functor to perform Armijo Linesearch with backtracking.
	* The functor iteratively decreases the step size until the following		* The functor iteratively decreases the step size until the following
	* conditions are met:		* conditions are met:
	*		*
	* Armijo: f(x - stepSize * grad(x)) <= f(x) - c1 * stepSize * grad(x)^T * grad(x)		* Armijo: f(x - stepSize * grad(x)) <= f(x) - cArmijo * stepSize * grad(x)^T * grad(x)
	* Wolfe: grad(x)^T grad(x - stepSize * grad(x)) <= c2 * grad(x)^T * grad(x)
	*		*
	* If either condition does not hold the step size is decreased:		* If either condition does not hold the step size is decreased:
	*		*
	* stepSize = decrease * stepSize		* stepSize = decrease * stepSize */
	*
	*/
	template<typename Scalar>		template<typename Scalar>
	class WolfeBacktracking		class ArmijoBacktracking
	{		{
	public:		public:
	typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;		typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
	typedef std::function<Scalar(const Vector &, Vector &)> Objective;		typedef std::function<Scalar(const Vector &, Vector &)> Objective;
	typedef std::function<void(const Vector &, const Scalar, Vector &)> FiniteDifferences;		typedef std::function<void(const Vector &, const Scalar, Vector &)> FiniteDifferences;
	private:		protected:
	Scalar decrease_;		Scalar decrease_;
	Scalar c1_;		Scalar cArmijo_;
	Scalar c2_;
	Scalar minStep_;		Scalar minStep_;
	Scalar maxStep_;		Scalar maxStep_;
	Index maxIt_;		Index maxIt_;
	@@ -419,40 +415,54 @@ namespace gdc
	finiteDifferences_(xval, fval, gradient);		finiteDifferences_(xval, fval, gradient);
	return fval;		return fval;
	}		}

			virtual bool computeSecondCondition(const Scalar,
			const Scalar,
			const Scalar,
			const Vector &,
			const Vector &)
			{
			return true;
			}
	public:		public:
	WolfeBacktracking()		ArmijoBacktracking()
	: WolfeBacktracking(0.8, 1e-4, 0.9, 1e-12, 1.0, 0)		: ArmijoBacktracking(0.8, 1e-4, 1e-12, 1.0, 0)
	{ }		{ }

	WolfeBacktracking(const Scalar decrease,		ArmijoBacktracking(const Scalar decrease,
	const Scalar c1,		const Scalar cArmijo,
	const Scalar c2,
	const Scalar minStep,		const Scalar minStep,
	const Scalar maxStep,		const Scalar maxStep,
	const Index iterations)		const Index iterations)
	: decrease_(decrease), c1_(c1), c2_(c2), minStep_(minStep),		: decrease_(decrease), cArmijo_(cArmijo), minStep_(minStep),
	maxStep_(maxStep), maxIt_(iterations), objective_()		maxStep_(maxStep), maxIt_(iterations), objective_()
	{ }		{
			assert(decrease > 0);
			assert(decrease < 1);
			assert(cArmijo > 0);
			assert(cArmijo < 0.5);
			assert(minStep < maxStep);
			}

	/** Set the decreasing factor for backtracking.		/** Set the decreasing factor for backtracking.
	* Assure that decrease in (0, 1).		* Assure that decrease in (0, 1).
	* @param decrease decreasing factor */		* @param decrease decreasing factor */
	void setBacktrackingDecrease(const Scalar decrease)		void setBacktrackingDecrease(const Scalar decrease)
	{		{
			assert(decrease > 0);
			assert(decrease < 1);
	decrease_ = decrease;		decrease_ = decrease;
	}		}

	/** Set the wolfe constants for Armijo and Wolfe condition (see class		/** Set the relaxation constant for the Armijo condition (see class
	* description).		* description).
	* Assure that c1 < c2 < 1 and c1 in (0, 0.5).		* Assure cArmijo in (0, 0.5).
	* @param c1 armijo constant		* @param cArmijo armijo constant */
	* @param c2 wolfe constant */		void setArmijoConstant(const Scalar cArmijo)
	void setWolfeConstants(const Scalar c1, const Scalar c2)
	{		{
	assert(c1 < c2);		assert(cArmijo > 0);
	assert(c2 < 1);		assert(cArmijo < 0.5);
	c1_ = c1;		cArmijo_ = cArmijo;
	c2_ = c2;
	}		}

	/** Set the bounds for the step size during linesearch.		/** Set the bounds for the step size during linesearch.
	@@ -495,21 +505,20 @@ namespace gdc
	Vector gradientN;		Vector gradientN;
	Vector xvalN;		Vector xvalN;
	Scalar fvalN;		Scalar fvalN;
	Scalar stepGrad = -gradient.dot(gradient);
	bool armijoCondition = false;		bool armijoCondition = false;
	bool wolfeCondition = false;		bool secondCondition = false;

	Index iterations = 0;		Index iterations = 0;
	while((maxIt_ <= 0 \|\| iterations < maxIt_) &&		while((maxIt_ <= 0 \|\| iterations < maxIt_) &&
	stepSize * decrease_ >= minStep_ &&		stepSize * decrease_ >= minStep_ &&
	!(armijoCondition && wolfeCondition))		!(armijoCondition && secondCondition))
	{		{
	stepSize = decrease_ * stepSize;		stepSize = decrease_ * stepSize;
	xvalN = xval - stepSize * gradient;		xvalN = xval - stepSize * gradient;
	fvalN = evaluateObjective(xvalN, gradientN);		fvalN = evaluateObjective(xvalN, gradientN);

	armijoCondition = fvalN <= fval + c1_ * stepSize * stepGrad;		armijoCondition = fvalN <= fval - cArmijo_ * stepSize * gradient.dot(gradient);
	wolfeCondition = -gradient.dot(gradientN) >= c2_ * stepGrad;		secondCondition = computeSecondCondition(stepSize, fval, fvalN, gradient, gradientN);

	++iterations;		++iterations;
	}		}
	@@ -518,130 +527,61 @@ namespace gdc
	}		}
	};		};

	/** Step size functor to perform Armijo Linesearch with backtracking.		/** Step size functor to perform Wolfe Linesearch with backtracking.
	* The functor iteratively decreases the step size until the following		* The functor iteratively decreases the step size until the following
	* conditions are met:		* conditions are met:
	*		*
	* Armijo: f(x - stepSize * grad(x)) <= f(x) - c1 * stepSize * grad(x)^T * grad(x)		* Armijo: f(x - stepSize * grad(x)) <= f(x) - cArmijo * stepSize * grad(x)^T * grad(x)
			* Wolfe: grad(x)^T grad(x - stepSize * grad(x)) <= cWolfe * grad(x)^T * grad(x)
	*		*
	* If either condition does not hold the step size is decreased:		* If either condition does not hold the step size is decreased:
	*		*
	* stepSize = decrease * stepSize */		* stepSize = decrease * stepSize */
	template<typename Scalar>		template<typename Scalar>
	class ArmijoBacktracking		class WolfeBacktracking : public ArmijoBacktracking<Scalar>
	{		{
	public:		public:
	typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;		typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
	typedef std::function<Scalar(const Vector &, Vector &)> Objective;		typedef std::function<Scalar(const Vector &, Vector &)> Objective;
	typedef std::function<void(const Vector &, const Scalar, Vector &)> FiniteDifferences;		typedef std::function<void(const Vector &, const Scalar, Vector &)> FiniteDifferences;
	private:		protected:
	Scalar decrease_;		Scalar cWolfe_;
	Scalar relaxation_;
	Scalar minStep_;
	Scalar maxStep_;
	Index maxIt_;
	Objective objective_;
	FiniteDifferences finiteDifferences_;

	Scalar evaluateObjective(const Vector &xval, Vector &gradient)		virtual bool computeSecondCondition(const Scalar,
			const Scalar ,
			const Scalar ,
			const Vector &gradient,
			const Vector &gradientN)
	{		{
	gradient.resize(0);		return gradient.dot(gradientN) <= cWolfe_ * gradient.dot(gradient);
	Scalar fval = objective_(xval, gradient);
	if(gradient.size() == 0)
	finiteDifferences_(xval, fval, gradient);
	return fval;
	}		}
	public:		public:
	ArmijoBacktracking()		WolfeBacktracking()
	: ArmijoBacktracking(0.8, 1e-4, 1e-12, 1.0, 0)		: WolfeBacktracking(0.8, 1e-4, 0.9, 1e-12, 1.0, 0)
	{ }		{ }

	ArmijoBacktracking(const Scalar decrease,		WolfeBacktracking(const Scalar decrease,
	const Scalar relaxation,		const Scalar cArmijo,
			const Scalar cWolfe,
	const Scalar minStep,		const Scalar minStep,
	const Scalar maxStep,		const Scalar maxStep,
	const Index iterations)		const Index iterations)
	: decrease_(decrease), relaxation_(relaxation), minStep_(minStep),		: ArmijoBacktracking<Scalar>(decrease, cArmijo, minStep, maxStep,
	maxStep_(maxStep), maxIt_(iterations), objective_()		iterations),cWolfe_(cWolfe)
	{ }

	/** Set the decreasing factor for backtracking.
	* Assure that decrease in (0, 1).
	* @param decrease decreasing factor */
	void setBacktrackingDecrease(const Scalar decrease)
	{		{
	decrease_ = decrease;		assert(cWolfe < 1);
			assert(cArmijo < cWolfe);
	}		}

	/** Set the relaxation constant for the Armijo condition (see class		/** Set the wolfe constants for Armijo and Wolfe condition (see class
	* description).		* description).
	* Assure relaxation in (0, 0.5).		* Assure that c1 < c2 < 1 and c1 in (0, 0.5).
	* @param relaxation armijo constant */		* @param c1 armijo constant
	void setRelaxationConstant(const Scalar relaxation)		* @param c2 wolfe constant */
	{		void setWolfeConstant(const Scalar cWolfe)
	assert(relaxation > 0);
	assert(relaxation < 0.5);
	relaxation_ = relaxation;
	}

	/** Set the bounds for the step size during linesearch.
	* The final step size is guaranteed to be in [minStep, maxStep].
	* @param minStep minimum step size
	* @param maxStep maximum step size */
	void setStepBounds(const Scalar minStep, const Scalar maxStep)
	{
	assert(minStep < maxStep);
	minStep_ = minStep;
	maxStep_ = maxStep;
	}

	/** Set the maximum number of iterations.
	* Set to 0 or negative for infinite iterations.
	* @param iterations maximum number of iterations */
	void setMaxIterations(const Index iterations)
	{
	maxIt_ = iterations;
	}

	void setObjective(const Objective &objective)
	{
	objective_ = objective;
	}

	void setFiniteDifferences(const FiniteDifferences &finiteDifferences)
	{
	finiteDifferences_ = finiteDifferences;
	}

	Scalar operator()(const Vector &xval,
	const Scalar fval,
	const Vector &gradient)
	{
	assert(objective_);
	assert(finiteDifferences_);

	Scalar stepSize = maxStep_ / decrease_;
	Vector gradientN;
	Vector xvalN;
	Scalar fvalN;
	Scalar stepGrad = -gradient.dot(gradient);
	bool armijoCondition = false;

	Index iterations = 0;
	while((maxIt_ <= 0 \|\| iterations < maxIt_) &&
	stepSize * decrease_ >= minStep_ &&
	!armijoCondition)
	{		{
	stepSize = decrease_ * stepSize;		assert(cWolfe < 1);
	xvalN = xval - stepSize * gradient;		cWolfe_ = cWolfe;
	fvalN = evaluateObjective(xvalN, gradientN);

	armijoCondition = fvalN <= fval + relaxation_ * stepSize * stepGrad;

	++iterations;
	}

	return stepSize;
	}		}
	};		};