Remove gdc::Index, make the vector a template parameter (04710f1f) · Commits · Jakub Klinkovský / TNL gradient descent

README.md

+3 −2

Original line number	Original line	Diff line number	Diff line
	@@ -54,7 +54,7 @@ struct Ackley
	int main()		int main()
	{		{
	// Define the vector type		// Define the vector type
	using Vector = TNL::Containers::Vector<double, TNL::Devices::Host, gdc::Index>;		using Vector = TNL::Containers::Vector<double, TNL::Devices::Host, int>;

	// Create optimizer object with Ackley functor as objective.		// Create optimizer object with Ackley functor as objective.
	//		//
	@@ -65,7 +65,8 @@ int main()
	// You can additionally specify a FiniteDifferences functor as template		// You can additionally specify a FiniteDifferences functor as template
	// parameter. There are Forward-, Backward- and CentralDifferences		// parameter. There are Forward-, Backward- and CentralDifferences
	// available. (Default is CentralDifferences)		// available. (Default is CentralDifferences)
	gdc::GradientDescent<double, Ackley, gdc::WolfeBacktracking<double>> optimizer;		using StepSize = gdc::WolfeBacktracking<Vector>;
			gdc::GradientDescent<Vector, Ackley, StepSize> optimizer;

	// Set number of iterations as stop criterion.		// Set number of iterations as stop criterion.
	// Set it to 0 or negative for infinite iterations (default is 0).		// Set it to 0 or negative for infinite iterations (default is 0).

examples/ackley.cpp

+3 −2

Original line number	Original line	Diff line number	Diff line
	@@ -25,7 +25,7 @@ struct Ackley
	int main()		int main()
	{		{
	// Define the vector type		// Define the vector type
	using Vector = TNL::Containers::Vector<double, TNL::Devices::Host, gdc::Index>;		using Vector = TNL::Containers::Vector<double, TNL::Devices::Host, int>;

	// Create optimizer object with Ackley functor as objective.		// Create optimizer object with Ackley functor as objective.
	//		//
	@@ -36,7 +36,8 @@ int main()
	// You can additionally specify a FiniteDifferences functor as template		// You can additionally specify a FiniteDifferences functor as template
	// parameter. There are Forward-, Backward- and CentralDifferences		// parameter. There are Forward-, Backward- and CentralDifferences
	// available. (Default is CentralDifferences)		// available. (Default is CentralDifferences)
	gdc::GradientDescent<double, Ackley, gdc::WolfeBacktracking<double>> optimizer;		using StepSize = gdc::WolfeBacktracking<Vector>;
			gdc::GradientDescent<Vector, Ackley, StepSize> optimizer;

	// Set number of iterations as stop criterion.		// Set number of iterations as stop criterion.
	// Set it to 0 or negative for infinite iterations (default is 0).		// Set it to 0 or negative for infinite iterations (default is 0).

examples/paraboloid.cpp

+4 −3

Original line number	Original line	Diff line number	Diff line
	@@ -20,7 +20,7 @@ struct Paraboloid
	int main()		int main()
	{		{
	// Define the vector type		// Define the vector type
	using Vector = TNL::Containers::Vector<double, TNL::Devices::Host, gdc::Index>;		using Vector = TNL::Containers::Vector<double, TNL::Devices::Host, int>;

	// Create optimizer object with Paraboloid functor as objective.		// Create optimizer object with Paraboloid functor as objective.
	//		//
	@@ -31,7 +31,8 @@ int main()
	// You can additionally specify a FiniteDifferences functor as template		// You can additionally specify a FiniteDifferences functor as template
	// parameter. There are Forward-, Backward- and CentralDifferences		// parameter. There are Forward-, Backward- and CentralDifferences
	// available. (Default is CentralDifferences)		// available. (Default is CentralDifferences)
	gdc::GradientDescent<double, Paraboloid, gdc::ConstantStepSize<double>> optimizer;		using StepSize = gdc::ConstantStepSize<Vector>;
			gdc::GradientDescent<Vector, Paraboloid, StepSize> optimizer;

	// Set number of iterations as stop criterion.		// Set number of iterations as stop criterion.
	// Set it to 0 or negative for infinite iterations (default is 0).		// Set it to 0 or negative for infinite iterations (default is 0).
	@@ -48,7 +49,7 @@ int main()
	optimizer.setMinStepLength(1e-6);		optimizer.setMinStepLength(1e-6);

	// Set the the parametrized StepSize functor used for the step calculation.		// Set the the parametrized StepSize functor used for the step calculation.
	optimizer.setStepSize(gdc::ConstantStepSize<double>(0.8));		optimizer.setStepSize(StepSize(0.8));

	// Set the momentum rate used for the step calculation (default is 0.0).		// Set the momentum rate used for the step calculation (default is 0.0).
	// Defines how much momentum is kept from previous iterations.		// Defines how much momentum is kept from previous iterations.

include/gdcpp.h

+31 −24

Original line number	Original line	Diff line number	Diff line
	@@ -17,8 +17,6 @@

	namespace gdc		namespace gdc
	{		{
	using Index = long int;

	/** Functor to compute forward differences.		/** Functor to compute forward differences.
	* Computes the gradient of the objective f(x) as follows:		* Computes the gradient of the objective f(x) as follows:
	*		*
	@@ -26,11 +24,12 @@ namespace gdc
	*		*
	* The computation requires len(x) evaluations of the objective.		* The computation requires len(x) evaluations of the objective.
	*/		*/
	template<typename Scalar>		template<typename Vector>
	class ForwardDifferences		class ForwardDifferences
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &)>;		using Objective = std::function<Scalar(const Vector &)>;
	private:		private:
	Scalar eps_;		Scalar eps_;
	@@ -87,11 +86,12 @@ namespace gdc
	*		*
	* The computation requires len(x) evaluations of the objective.		* The computation requires len(x) evaluations of the objective.
	*/		*/
	template<typename Scalar>		template<typename Vector>
	class BackwardDifferences		class BackwardDifferences
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &)>;		using Objective = std::function<Scalar(const Vector &)>;
	private:		private:
	Scalar eps_;		Scalar eps_;
	@@ -147,11 +147,12 @@ namespace gdc
	*		*
	* The computation requires 2 * len(x) evaluations of the objective.		* The computation requires 2 * len(x) evaluations of the objective.
	*/		*/
	template<typename Scalar>		template<typename Vector>
	struct CentralDifferences		struct CentralDifferences
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &)>;		using Objective = std::function<Scalar(const Vector &)>;
	private:		private:
	Scalar eps_;		Scalar eps_;
	@@ -209,11 +210,12 @@ namespace gdc
	};		};

	/** Step size functor, which returns a constant step size. */		/** Step size functor, which returns a constant step size. */
	template<typename Scalar>		template<typename Vector>
	class ConstantStepSize		class ConstantStepSize
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &, Vector &)>;		using Objective = std::function<Scalar(const Vector &, Vector &)>;
	using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;		using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;
	private:		private:
	@@ -259,11 +261,12 @@ namespace gdc
	* Inverse: stepSize = (y_k^T * s_k) / (y_k^T * y_k)		* Inverse: stepSize = (y_k^T * s_k) / (y_k^T * y_k)
	*		*
	* The very first step is computed as a constant. */		* The very first step is computed as a constant. */
	template<typename Scalar>		template<typename Vector>
	class BarzilaiBorwein		class BarzilaiBorwein
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &, Vector &)>;		using Objective = std::function<Scalar(const Vector &, Vector &)>;
	using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;		using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;

	@@ -374,11 +377,12 @@ namespace gdc
	* If either condition does not hold the step size is decreased:		* If either condition does not hold the step size is decreased:
	*		*
	* stepSize = decrease * stepSize */		* stepSize = decrease * stepSize */
	template<typename Scalar>		template<typename Vector>
	class ArmijoBacktracking		class ArmijoBacktracking
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &, Vector &)>;		using Objective = std::function<Scalar(const Vector &, Vector &)>;
	using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;		using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;
	protected:		protected:
	@@ -520,11 +524,12 @@ namespace gdc
	* If either condition does not hold the step size is decreased:		* If either condition does not hold the step size is decreased:
	*		*
	* stepSize = decrease * stepSize */		* stepSize = decrease * stepSize */
	template<typename Scalar>		template<typename Vector>
	class WolfeBacktracking : public ArmijoBacktracking<Scalar>		class WolfeBacktracking : public ArmijoBacktracking<Vector>
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &, Vector &)>;		using Objective = std::function<Scalar(const Vector &, Vector &)>;
	using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;		using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;
	protected:		protected:
	@@ -549,7 +554,7 @@ namespace gdc
	const Scalar minStep,		const Scalar minStep,
	const Scalar maxStep,		const Scalar maxStep,
	const Index iterations)		const Index iterations)
	: ArmijoBacktracking<Scalar>(decrease, cArmijo, minStep, maxStep,		: ArmijoBacktracking<Vector>(decrease, cArmijo, minStep, maxStep,
	iterations),cWolfe_(cWolfe)		iterations),cWolfe_(cWolfe)
	{		{
	assert(cWolfe < 1);		assert(cWolfe < 1);
	@@ -581,11 +586,12 @@ namespace gdc
	*		*
	* This functor does not require to compute any gradients and does not use		* This functor does not require to compute any gradients and does not use
	* finite differences. */		* finite differences. */
	template<typename Scalar>		template<typename Vector>
	class DecreaseBacktracking		class DecreaseBacktracking
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Objective = std::function<Scalar(const Vector &, Vector &)>;		using Objective = std::function<Scalar(const Vector &, Vector &)>;
	using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;		using FiniteDifferences = std::function<void(const Vector &, const Scalar, Vector &)>;
	private:		private:
	@@ -673,14 +679,15 @@ namespace gdc
	}		}
	};		};

	template<typename Scalar,		template<typename Vector,
	typename Objective,		typename Objective,
	typename StepSize=BarzilaiBorwein<Scalar>,		typename StepSize=BarzilaiBorwein<Vector>,
	typename FiniteDifferences=CentralDifferences<Scalar>>		typename FiniteDifferences=CentralDifferences<Vector>>
	class GradientDescent		class GradientDescent
	{		{
	public:		public:
	using Vector = TNL::Containers::Vector<Scalar, TNL::Devices::Host, Index>;		using Scalar = typename Vector::RealType;
			using Index = typename Vector::IndexType;
	using Callback = std::function<bool(Index iterations, const Vector & xval, Scalar fval, const Vector & gradient)>;		using Callback = std::function<bool(Index iterations, const Vector & xval, Scalar fval, const Vector & gradient)>;

	struct Result		struct Result

test/gdcpp.cpp

+36 −36

Original line number	Original line	Diff line number	Diff line
	@@ -3,20 +3,21 @@

	using namespace gdc;		using namespace gdc;

	template<typename Scalar>		using Scalar = float;
			using Device = TNL::Devices::Host;
			using Index = int;
			using Vector = TNL::Containers::Vector<Scalar, Device, Index>;

	struct Paraboloid		struct Paraboloid
	{		{
	using Vector = TNL::Containers::Vector<float, TNL::Devices::Host, gdc::Index>;
	Scalar operator()(const Vector &state, Vector &)		Scalar operator()(const Vector &state, Vector &)
	{		{
	return state(0) * state(0) + state(1) * state(1);		return state(0) * state(0) + state(1) * state(1);
	}		}
	};		};

	template<typename Scalar>
	struct Rosenbrock		struct Rosenbrock
	{		{
	using Vector = TNL::Containers::Vector<float, TNL::Devices::Host, gdc::Index>;
	Scalar operator()(const Vector &state, Vector &)		Scalar operator()(const Vector &state, Vector &)
	{		{
	Scalar delta1 = 1 - state(0);		Scalar delta1 = 1 - state(0);
	@@ -26,20 +27,18 @@ struct Rosenbrock
	}		}
	};		};

	using Vector = TNL::Containers::Vector<float, TNL::Devices::Host, gdc::Index>;

	TEST_CASE("gradient_descent")		TEST_CASE("gradient_descent")
	{		{
	const float eps = 1e-3;		const Scalar eps = 1e-3;

	SECTION("optimize paraboloid")		SECTION("optimize paraboloid")
	{		{
	SECTION("forward differences")		SECTION("forward differences")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	ConstantStepSize<float>,		ConstantStepSize<Vector>,
	ForwardDifferences<float>> optimizer;		ForwardDifferences<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -51,10 +50,10 @@ TEST_CASE("gradient_descent")

	SECTION("backward differences")		SECTION("backward differences")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	ConstantStepSize<float>,		ConstantStepSize<Vector>,
	BackwardDifferences<float>> optimizer;		BackwardDifferences<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -66,10 +65,10 @@ TEST_CASE("gradient_descent")

	SECTION("central differences")		SECTION("central differences")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	ConstantStepSize<float>,		ConstantStepSize<Vector>,
	CentralDifferences<float>> optimizer;		CentralDifferences<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -81,9 +80,9 @@ TEST_CASE("gradient_descent")

	SECTION("constant step size")		SECTION("constant step size")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	ConstantStepSize<float>> optimizer;		ConstantStepSize<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -95,9 +94,9 @@ TEST_CASE("gradient_descent")

	SECTION("Barzilai-Borwein step")		SECTION("Barzilai-Borwein step")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	BarzilaiBorwein<float>> optimizer;		BarzilaiBorwein<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -109,9 +108,9 @@ TEST_CASE("gradient_descent")

	SECTION("Wolfe linesearch")		SECTION("Wolfe linesearch")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	WolfeBacktracking<float>> optimizer;		WolfeBacktracking<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -123,9 +122,9 @@ TEST_CASE("gradient_descent")

	SECTION("Armijo linesearch")		SECTION("Armijo linesearch")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	ArmijoBacktracking<float>> optimizer;		ArmijoBacktracking<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -137,9 +136,9 @@ TEST_CASE("gradient_descent")

	SECTION("Decrease linesearch")		SECTION("Decrease linesearch")
	{		{
	GradientDescent<float,		GradientDescent<Vector,
	Paraboloid<float>,		Paraboloid,
	DecreaseBacktracking<float>> optimizer;		DecreaseBacktracking<Vector>> optimizer;
	optimizer.setMaxIterations(100);		optimizer.setMaxIterations(100);

	Vector xval = {2, 2};		Vector xval = {2, 2};
	@@ -152,8 +151,9 @@ TEST_CASE("gradient_descent")

	SECTION("optimize Rosenbrock")		SECTION("optimize Rosenbrock")
	{		{
	GradientDescent<float, Rosenbrock<float>,		GradientDescent<Vector,
	WolfeBacktracking<float>> optimizer;		Rosenbrock,
			WolfeBacktracking<Vector>> optimizer;
	optimizer.setMaxIterations(3000);		optimizer.setMaxIterations(3000);
	optimizer.setMomentum(0.9);		optimizer.setMomentum(0.9);
	Vector xval = {-0.5, 0.5};		Vector xval = {-0.5, 0.5};