/* gdcpp.h * * Author: Fabian Meyer * Created On: 12 Jul 2019 * License: MIT */ #ifndef GDCPP_GDCPP_H_ #define GDCPP_GDCPP_H_ #include #include #include #include #include namespace gdc { typedef long int Index; /** Functor to compute forward differences. * Computes the gradient of the objective f(x) as follows: * * grad(x) = (f(x + eps) - f(x)) / eps * * The computation requires len(x) evaluations of the objective. */ template class ForwardDifferences { public: typedef Eigen::Matrix Vector; typedef std::function Objective; private: Scalar eps_; Index threads_; Objective objective_; public: ForwardDifferences() : ForwardDifferences(std::sqrt(std::numeric_limits::epsilon())) {} ForwardDifferences(const Scalar eps) : eps_(eps), threads_(1), objective_() {} void setNumericalEpsilon(const Scalar eps) { eps_ = eps; } void setThreads(const Index threads) { threads_ = threads; } void setObjective(const Objective &objective) { objective_ = objective; } void operator()(const Vector &xval, const Scalar fval, Vector &gradient) { assert(objective_); gradient.resize(xval.size()); #pragma omp parallel for num_threads(threads_) for (Index i = 0; i < xval.size(); ++i) { Vector xvalN = xval; xvalN(i) += eps_; Scalar fvalN = objective_(xvalN); gradient(i) = (fvalN - fval) / eps_; } } }; /** Functor to compute backward differences. * Computes the gradient of the objective f(x) as follows: * * grad(x) = (f(x) - f(x - eps)) / eps * * The computation requires len(x) evaluations of the objective. */ template class BackwardDifferences { public: typedef Eigen::Matrix Vector; typedef std::function Objective; private: Scalar eps_; Index threads_; Objective objective_; public: BackwardDifferences() : BackwardDifferences(std::sqrt(std::numeric_limits::epsilon())) { } BackwardDifferences(const Scalar eps) : eps_(eps), threads_(1), objective_() {} void setNumericalEpsilon(const Scalar eps) { eps_ = eps; } void setThreads(const Index threads) { threads_ = threads; } void setObjective(const Objective &objective) { objective_ = objective; } void operator()(const Vector &xval, const Scalar fval, Vector &gradient) { assert(objective_); gradient.resize(xval.size()); #pragma omp parallel for num_threads(threads_) for (Index i = 0; i < xval.size(); ++i) { Vector xvalN = xval; xvalN(i) -= eps_; Scalar fvalN = objective_(xvalN); gradient(i) = (fval - fvalN) / eps_; } } }; /** Functor to compute central differences. * Computes the gradient of the objective f(x) as follows: * * grad(x) = (f(x + 0.5 eps) - f(x - 0.5 eps)) / eps * * The computation requires 2 * len(x) evaluations of the objective. */ template struct CentralDifferences { public: typedef Eigen::Matrix Vector; typedef std::function Objective; private: Scalar eps_; Index threads_; Objective objective_; public: CentralDifferences() : CentralDifferences(std::sqrt(std::numeric_limits::epsilon())) {} CentralDifferences(const Scalar eps) : eps_(eps), threads_(1), objective_() {} void setNumericalEpsilon(const Scalar eps) { eps_ = eps; } void setThreads(const Index threads) { threads_ = threads; } void setObjective(const Objective &objective) { objective_ = objective; } void operator()(const Vector &xval, const Scalar, Vector &gradient) { assert(objective_); Vector fvals(xval.size() * 2); #pragma omp parallel for num_threads(threads_) for (Index i = 0; i < fvals.size(); ++i) { Index idx = i / 2; Vector xvalN = xval; if (i % 2 == 0) xvalN(idx) += eps_ / 2; else xvalN(idx) -= eps_ / 2; fvals(i) = objective_(xvalN); } gradient.resize(xval.size()); for (Index i = 0; i < xval.size(); ++i) gradient(i) = (fvals(i * 2) - fvals(i * 2 + 1)) / eps_; } }; /** Dummy callback functor, which does nothing. */ template struct NoCallback { typedef Eigen::Matrix Vector; bool operator()(const Index, const Vector &, const Scalar, const Vector &) const { return true; } }; /** Step size functor, which returns a constant step size. */ template class ConstantStepSize { public: typedef Eigen::Matrix Vector; typedef std::function Objective; typedef std::function FiniteDifferences; private: Scalar stepSize_; public: ConstantStepSize() : ConstantStepSize(0.000000000000001) {} ConstantStepSize(const Scalar stepSize) : stepSize_(stepSize) {} /** Set the step size returned by this functor. * @param stepSize step size returned by functor */ void setStepSize(const Scalar stepSize) { stepSize_ = stepSize; } void setObjective(const Objective &) {} void setFiniteDifferences(const FiniteDifferences &) {} Scalar operator()(const Vector &, const Scalar, const Vector &) { return stepSize_; } }; /** Step size functor to compute Barzilai-Borwein (BB) steps. * The functor can either compute the direct or inverse BB step. * The steps are computed as follows: * * s_k = x_k - x_k-1 k >= 1 * y_k = grad_k - grad_k-1 k >= 1 * Direct: stepSize = (s_k^T * s_k) / (y_k^T * s_k) * Inverse: stepSize = (y_k^T * s_k) / (y_k^T * y_k) * * The very first step is computed as a constant. */ template class BarzilaiBorwein { public: typedef Eigen::Matrix Vector; typedef std::function Objective; typedef std::function FiniteDifferences; enum class Method { Direct, Inverse }; private: Vector lastXval_; Vector lastGradient_; Method method_; Scalar constStep_; Scalar constantStep() const { return constStep_; } Scalar directStep(const Vector &xval, const Vector &gradient) { auto sk = xval - lastXval_; auto yk = gradient - lastGradient_; Scalar num = sk.dot(sk); Scalar denom = sk.dot(yk); if (denom == 0) return 1; else return std::abs(num / denom); } Scalar inverseStep(const Vector &xval, const Vector &gradient) { auto sk = xval - lastXval_; auto yk = gradient - lastGradient_; Scalar num = sk.dot(yk); Scalar denom = yk.dot(yk); if (denom == 0) return 1; else return std::abs(num / denom); } public: BarzilaiBorwein() : BarzilaiBorwein(Method::Inverse, 1) {} BarzilaiBorwein(const Method method, const Scalar constStep) : lastXval_(), lastGradient_(), method_(method), constStep_(constStep) {} void setObjective(const Objective &) {} void setFiniteDifferences(const FiniteDifferences &) {} void setMethod(const Method method) { method_ = method; } void setConstStepSize(const Scalar stepSize) { constStep_ = stepSize; } Scalar operator()(const Vector &xval, const Scalar, const Vector &gradient) { Scalar stepSize = 0; if (lastXval_.size() == 0) { stepSize = constStep_; } else { switch (method_) { case Method::Direct: stepSize = directStep(xval, gradient); break; case Method::Inverse: stepSize = inverseStep(xval, gradient); break; default: assert(false); break; } } lastGradient_ = gradient; lastXval_ = xval; return stepSize; } }; /** Step size functor to perform Armijo Linesearch with backtracking. * The functor iteratively decreases the step size until the following * conditions are met: * * Armijo: f(x - stepSize * grad(x)) <= f(x) - cArmijo * stepSize * grad(x)^T * * grad(x) * * If either condition does not hold the step size is decreased: * * stepSize = decrease * stepSize */ template class ArmijoBacktracking { public: typedef Eigen::Matrix Vector; typedef std::function Objective; typedef std::function FiniteDifferences; protected: Scalar decrease_; Scalar cArmijo_; Scalar minStep_; Scalar maxStep_; Index maxIt_; Objective objective_; FiniteDifferences finiteDifferences_; Scalar evaluateObjective(const Vector &xval, Vector &gradient) { gradient.resize(0); Scalar fval = objective_(xval, gradient); if (gradient.size() == 0) finiteDifferences_(xval, fval, gradient); return fval; } virtual bool computeSecondCondition(const Scalar, const Scalar, const Scalar, const Vector &, const Vector &) { return true; } public: ArmijoBacktracking() : ArmijoBacktracking(0.8, 1e-4, 1e-20, 1, 0) {} // NOTE: maxStep was 1 ArmijoBacktracking(const Scalar decrease, const Scalar cArmijo, const Scalar minStep, const Scalar maxStep, const Index iterations) : decrease_(decrease), cArmijo_(cArmijo), minStep_(minStep), maxStep_(maxStep), maxIt_(iterations), objective_() { assert(decrease > 0); assert(decrease < 1); assert(cArmijo > 0); assert(cArmijo < 0.5); assert(minStep < maxStep); } /** Set the decreasing factor for backtracking. * Assure that decrease in (0, 1). * @param decrease decreasing factor */ void setBacktrackingDecrease(const Scalar decrease) { assert(decrease > 0); assert(decrease < 1); decrease_ = decrease; } /** Set the relaxation constant for the Armijo condition (see class * description). * Assure cArmijo in (0, 0.5). * @param cArmijo armijo constant */ void setArmijoConstant(const Scalar cArmijo) { assert(cArmijo > 0); assert(cArmijo < 0.5); cArmijo_ = cArmijo; } /** Set the bounds for the step size during linesearch. * The final step size is guaranteed to be in [minStep, maxStep]. * @param minStep minimum step size * @param maxStep maximum step size */ void setStepBounds(const Scalar minStep, const Scalar maxStep) { assert(minStep < maxStep); minStep_ = minStep; maxStep_ = maxStep; } /** Set the maximum number of iterations. * Set to 0 or negative for infinite iterations. * @param iterations maximum number of iterations */ void setMaxIterations(const Index iterations) { maxIt_ = iterations; } void setObjective(const Objective &objective) { objective_ = objective; } void setFiniteDifferences(const FiniteDifferences &finiteDifferences) { finiteDifferences_ = finiteDifferences; } Scalar operator()(const Vector &xval, const Scalar fval, const Vector &gradient) { assert(objective_); assert(finiteDifferences_); Scalar stepSize = maxStep_ / decrease_; Vector gradientN; Vector xvalN; Scalar fvalN; bool armijoCondition = false; bool secondCondition = false; Index iterations = 0; while ((maxIt_ <= 0 || iterations < maxIt_) && stepSize * decrease_ >= minStep_ && !(armijoCondition && secondCondition)) { stepSize = decrease_ * stepSize; xvalN = xval - stepSize * gradient; fvalN = evaluateObjective(xvalN, gradientN); armijoCondition = fvalN <= fval - cArmijo_ * stepSize * gradient.dot(gradient); secondCondition = computeSecondCondition(stepSize, fval, fvalN, gradient, gradientN); ++iterations; } return stepSize; } }; /** Step size functor to perform Wolfe Linesearch with backtracking. * The functor iteratively decreases the step size until the following * conditions are met: * * Armijo: f(x - stepSize * grad(x)) <= f(x) - cArmijo * stepSize * grad(x)^T * * grad(x) Wolfe: grad(x)^T grad(x - stepSize * grad(x)) <= cWolfe * grad(x)^T * * grad(x) * * If either condition does not hold the step size is decreased: * * stepSize = decrease * stepSize */ template class WolfeBacktracking : public ArmijoBacktracking { public: typedef Eigen::Matrix Vector; typedef std::function Objective; typedef std::function FiniteDifferences; protected: Scalar cWolfe_; virtual bool computeSecondCondition(const Scalar, const Scalar, const Scalar, const Vector &gradient, const Vector &gradientN) { return gradient.dot(gradientN) <= cWolfe_ * gradient.dot(gradient); } public: WolfeBacktracking() : WolfeBacktracking(0.8, 1e-4, 0.9, 1e-20, 1, 0) {} WolfeBacktracking(const Scalar decrease, const Scalar cArmijo, const Scalar cWolfe, const Scalar minStep, const Scalar maxStep, const Index iterations) : ArmijoBacktracking(decrease, cArmijo, minStep, maxStep, iterations), cWolfe_(cWolfe) { assert(cWolfe < 1); assert(cArmijo < cWolfe); } /** Set the wolfe constants for Armijo and Wolfe condition (see class * description). * Assure that c1 < c2 < 1 and c1 in (0, 0.5). * @param c1 armijo constant * @param c2 wolfe constant */ void setWolfeConstant(const Scalar cWolfe) { assert(cWolfe < 1); cWolfe_ = cWolfe; } }; /** Step size functor which searches for a step that reduces the function * value. * The functor iteratively decreases the step size until the following * condition is met: * * f(x - stepSize * grad) < f(x) * * If this condition does not hold the step size is decreased: * * stepSize = decrease * stepSize * * This functor does not require to compute any gradients and does not use * finite differences. */ template class DecreaseBacktracking { public: typedef Eigen::Matrix Vector; typedef std::function Objective; typedef std::function FiniteDifferences; private: Scalar decrease_; Scalar minStep_; Scalar maxStep_; Index maxIt_; Objective objective_; public: DecreaseBacktracking() : DecreaseBacktracking(0.8, 1e-12, 1, 0) {} DecreaseBacktracking(const Scalar decrease, const Scalar minStep, const Scalar maxStep, const Index iterations) : decrease_(decrease), minStep_(minStep), maxStep_(maxStep), maxIt_(iterations), objective_() {} /** Set the decreasing factor for backtracking. * Assure that decrease in (0, 1). * @param decrease decreasing factor */ void setBacktrackingDecrease(const Scalar decrease) { decrease_ = decrease; } /** Set the bounds for the step size during linesearch. * The final step size is guaranteed to be in [minStep, maxStep]. * @param minStep minimum step size * @param maxStep maximum step size */ void setStepBounds(const Scalar minStep, const Scalar maxStep) { assert(minStep < maxStep); minStep_ = minStep; maxStep_ = maxStep; } /** Set the maximum number of iterations. * Set to 0 or negative for infinite iterations. * @param iterations maximum number of iterations */ void setMaxIterations(const Index iterations) { maxIt_ = iterations; } void setObjective(const Objective &objective) { objective_ = objective; } void setFiniteDifferences(const FiniteDifferences &) {} Scalar operator()(const Vector &xval, const Scalar fval, const Vector &gradient) { assert(objective_); Scalar stepSize = maxStep_ / decrease_; Vector xvalN; Vector gradientN; Scalar fvalN; bool improvement = false; Index iterations = 0; while ((maxIt_ <= 0 || iterations < maxIt_) && stepSize * decrease_ >= minStep_ && !improvement) { stepSize = decrease_ * stepSize; xvalN = xval - stepSize * gradient; fvalN = objective_(xvalN, gradientN); improvement = fvalN < fval; ++iterations; } return stepSize; } }; template , typename Callback = NoCallback, typename FiniteDifferences = CentralDifferences> class GradientDescent { public: typedef Eigen::Matrix Vector; struct Result { Index iterations; bool converged; Scalar fval; Vector xval; }; protected: Index maxIt_; Scalar minGradientLen_; Scalar minStepLen_; Scalar momentum_; Index verbosity_; Objective objective_; StepSize stepSize_; Callback callback_; FiniteDifferences finiteDifferences_; Scalar evaluateObjective(const Vector &xval, Vector &gradient) { gradient.resize(0); Scalar fval = objective_(xval, gradient); if (gradient.size() == 0) finiteDifferences_(xval, fval, gradient); return fval; } std::string vector2str(const Vector &vec) const { std::stringstream ss1; ss1 << std::fixed << std::showpoint << std::setprecision(16); std::stringstream ss2; ss2 << '['; for (Index i = 0; i < vec.size(); ++i) { ss1 << vec(i); ss2 << std::setfill(' ') << std::setw(10) << ss1.str(); if (i != vec.size() - 1) ss2 << ' '; ss1.str(""); } ss2 << ']'; return ss2.str(); } public: GradientDescent() : maxIt_(0), minGradientLen_(static_cast(1e-2)), minStepLen_(static_cast(1e-6)), momentum_(0), verbosity_(0), objective_(), stepSize_(), callback_(), finiteDifferences_() {} ~GradientDescent() {} void setThreads(const Index threads) { finiteDifferences_.setThreads(threads); } void setNumericalEpsilon(const Scalar eps) { finiteDifferences_.setNumericalEpsilon(eps); } void setMaxIterations(const Index iterations) { maxIt_ = iterations; } void setObjective(const Objective &objective) { objective_ = objective; } void setCallback(const Callback &callback) { callback_ = callback; } void setMinGradientLength(const Scalar gradientLen) { minGradientLen_ = gradientLen; } void setMinStepLength(const Scalar stepLen) { minStepLen_ = stepLen; } void setStepSize(const StepSize stepSize) { stepSize_ = stepSize; } void setMomentum(const Scalar momentum) { momentum_ = momentum; } void setVerbosity(const Index verbosity) { verbosity_ = verbosity; } Result minimize(const Vector &initialGuess) { finiteDifferences_.setObjective([this](const Vector &xval) { Vector tmp; return this->objective_(xval, tmp); }); stepSize_.setObjective([this](const Vector &xval, Vector &gradient) { return this->objective_(xval, gradient); }); stepSize_.setFiniteDifferences( [this](const Vector &xval, const Scalar fval, Vector &gradient) { this->finiteDifferences_(xval, fval, gradient); }); Vector xval = initialGuess; Vector gradient; Scalar fval; Scalar gradientLen = minGradientLen_ + 1; Scalar stepSize; Vector step = Vector::Zero(xval.size()); Scalar stepLen = minStepLen_ + 1; bool callbackResult = true; Index iterations = 0; while ((maxIt_ <= 0 || iterations < maxIt_) && gradientLen >= minGradientLen_ && stepLen >= minStepLen_ && callbackResult) { xval -= step; fval = evaluateObjective(xval, gradient); gradientLen = gradient.norm(); // update step according to step size and momentum stepSize = stepSize_(xval, fval, gradient); step = momentum_ * step + (1 - momentum_) * stepSize * gradient; stepLen = step.norm(); // evaluate callback an save its result callbackResult = callback_(iterations, xval, fval, gradient); if (verbosity_ > 0) { std::stringstream ss; ss << "it=" << std::setfill('0') << std::setw(4) << iterations << std::fixed << std::showpoint << std::setprecision(20) << " gradlen=" << gradientLen << " stepsize=" << stepSize << " steplen=" << stepLen; if (verbosity_ > 2) ss << " callback=" << (callbackResult ? "true" : "false"); ss << " fval=" << fval; if (verbosity_ > 1) ss << " xval=" << vector2str(xval); if (verbosity_ > 2) ss << " gradient=" << vector2str(gradient); if (verbosity_ > 3) ss << " step=" << vector2str(step); std::cout << ss.str() << std::endl; } ++iterations; } Result result; result.xval = xval; result.fval = fval; result.iterations = iterations; result.converged = gradientLen < minGradientLen_ || stepLen < minStepLen_; return result; } }; } // namespace gdc #endif