710 lines
21 KiB
C++
710 lines
21 KiB
C++
/* gdcpp.h
|
|
*
|
|
* Author: Fabian Meyer
|
|
* Created On: 12 Jul 2019
|
|
* License: MIT
|
|
*/
|
|
|
|
#ifndef GDCPP_GDCPP_H_
|
|
#define GDCPP_GDCPP_H_
|
|
|
|
#include <Eigen/Geometry>
|
|
#include <functional>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <limits>
|
|
|
|
namespace gdc {
|
|
typedef long int Index;
|
|
|
|
/** Functor to compute forward differences.
|
|
* Computes the gradient of the objective f(x) as follows:
|
|
*
|
|
* grad(x) = (f(x + eps) - f(x)) / eps
|
|
*
|
|
* The computation requires len(x) evaluations of the objective.
|
|
*/
|
|
template <typename Scalar> class ForwardDifferences {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &)> Objective;
|
|
|
|
private:
|
|
Scalar eps_;
|
|
Index threads_;
|
|
Objective objective_;
|
|
|
|
public:
|
|
ForwardDifferences()
|
|
: ForwardDifferences(std::sqrt(std::numeric_limits<Scalar>::epsilon())) {}
|
|
|
|
ForwardDifferences(const Scalar eps) : eps_(eps), threads_(1), objective_() {}
|
|
|
|
void setNumericalEpsilon(const Scalar eps) { eps_ = eps; }
|
|
|
|
void setThreads(const Index threads) { threads_ = threads; }
|
|
|
|
void setObjective(const Objective &objective) { objective_ = objective; }
|
|
|
|
void operator()(const Vector &xval, const Scalar fval, Vector &gradient) {
|
|
assert(objective_);
|
|
|
|
gradient.resize(xval.size());
|
|
#pragma omp parallel for num_threads(threads_)
|
|
for (Index i = 0; i < xval.size(); ++i) {
|
|
Vector xvalN = xval;
|
|
xvalN(i) += eps_;
|
|
Scalar fvalN = objective_(xvalN);
|
|
|
|
gradient(i) = (fvalN - fval) / eps_;
|
|
}
|
|
}
|
|
};
|
|
|
|
/** Functor to compute backward differences.
|
|
* Computes the gradient of the objective f(x) as follows:
|
|
*
|
|
* grad(x) = (f(x) - f(x - eps)) / eps
|
|
*
|
|
* The computation requires len(x) evaluations of the objective.
|
|
*/
|
|
template <typename Scalar> class BackwardDifferences {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &)> Objective;
|
|
|
|
private:
|
|
Scalar eps_;
|
|
Index threads_;
|
|
Objective objective_;
|
|
|
|
public:
|
|
BackwardDifferences()
|
|
: BackwardDifferences(std::sqrt(std::numeric_limits<Scalar>::epsilon())) {
|
|
}
|
|
|
|
BackwardDifferences(const Scalar eps)
|
|
: eps_(eps), threads_(1), objective_() {}
|
|
|
|
void setNumericalEpsilon(const Scalar eps) { eps_ = eps; }
|
|
|
|
void setThreads(const Index threads) { threads_ = threads; }
|
|
|
|
void setObjective(const Objective &objective) { objective_ = objective; }
|
|
|
|
void operator()(const Vector &xval, const Scalar fval, Vector &gradient) {
|
|
assert(objective_);
|
|
|
|
gradient.resize(xval.size());
|
|
#pragma omp parallel for num_threads(threads_)
|
|
for (Index i = 0; i < xval.size(); ++i) {
|
|
Vector xvalN = xval;
|
|
xvalN(i) -= eps_;
|
|
Scalar fvalN = objective_(xvalN);
|
|
gradient(i) = (fval - fvalN) / eps_;
|
|
}
|
|
}
|
|
};
|
|
|
|
/** Functor to compute central differences.
|
|
* Computes the gradient of the objective f(x) as follows:
|
|
*
|
|
* grad(x) = (f(x + 0.5 eps) - f(x - 0.5 eps)) / eps
|
|
*
|
|
* The computation requires 2 * len(x) evaluations of the objective.
|
|
*/
|
|
template <typename Scalar> struct CentralDifferences {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &)> Objective;
|
|
|
|
private:
|
|
Scalar eps_;
|
|
Index threads_;
|
|
Objective objective_;
|
|
|
|
public:
|
|
CentralDifferences()
|
|
: CentralDifferences(std::sqrt(std::numeric_limits<Scalar>::epsilon())) {}
|
|
|
|
CentralDifferences(const Scalar eps) : eps_(eps), threads_(1), objective_() {}
|
|
|
|
void setNumericalEpsilon(const Scalar eps) { eps_ = eps; }
|
|
|
|
void setThreads(const Index threads) { threads_ = threads; }
|
|
|
|
void setObjective(const Objective &objective) { objective_ = objective; }
|
|
|
|
void operator()(const Vector &xval, const Scalar, Vector &gradient) {
|
|
assert(objective_);
|
|
|
|
Vector fvals(xval.size() * 2);
|
|
#pragma omp parallel for num_threads(threads_)
|
|
for (Index i = 0; i < fvals.size(); ++i) {
|
|
Index idx = i / 2;
|
|
Vector xvalN = xval;
|
|
if (i % 2 == 0)
|
|
xvalN(idx) += eps_ / 2;
|
|
else
|
|
xvalN(idx) -= eps_ / 2;
|
|
|
|
fvals(i) = objective_(xvalN);
|
|
}
|
|
|
|
gradient.resize(xval.size());
|
|
for (Index i = 0; i < xval.size(); ++i)
|
|
gradient(i) = (fvals(i * 2) - fvals(i * 2 + 1)) / eps_;
|
|
}
|
|
};
|
|
|
|
/** Dummy callback functor, which does nothing. */
|
|
template <typename Scalar> struct NoCallback {
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
|
|
bool operator()(const Index, const Vector &, const Scalar,
|
|
const Vector &) const {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/** Step size functor, which returns a constant step size. */
|
|
template <typename Scalar> class ConstantStepSize {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &, Vector &)> Objective;
|
|
typedef std::function<void(const Vector &, const Scalar, Vector &)>
|
|
FiniteDifferences;
|
|
|
|
private:
|
|
Scalar stepSize_;
|
|
|
|
public:
|
|
ConstantStepSize() : ConstantStepSize(0.000000000000001) {}
|
|
|
|
ConstantStepSize(const Scalar stepSize) : stepSize_(stepSize) {}
|
|
|
|
/** Set the step size returned by this functor.
|
|
* @param stepSize step size returned by functor */
|
|
void setStepSize(const Scalar stepSize) { stepSize_ = stepSize; }
|
|
|
|
void setObjective(const Objective &) {}
|
|
|
|
void setFiniteDifferences(const FiniteDifferences &) {}
|
|
|
|
Scalar operator()(const Vector &, const Scalar, const Vector &) {
|
|
return stepSize_;
|
|
}
|
|
};
|
|
|
|
/** Step size functor to compute Barzilai-Borwein (BB) steps.
|
|
* The functor can either compute the direct or inverse BB step.
|
|
* The steps are computed as follows:
|
|
*
|
|
* s_k = x_k - x_k-1 k >= 1
|
|
* y_k = grad_k - grad_k-1 k >= 1
|
|
* Direct: stepSize = (s_k^T * s_k) / (y_k^T * s_k)
|
|
* Inverse: stepSize = (y_k^T * s_k) / (y_k^T * y_k)
|
|
*
|
|
* The very first step is computed as a constant. */
|
|
template <typename Scalar> class BarzilaiBorwein {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &, Vector &)> Objective;
|
|
typedef std::function<void(const Vector &, const Scalar, Vector &)>
|
|
FiniteDifferences;
|
|
|
|
enum class Method { Direct, Inverse };
|
|
|
|
private:
|
|
Vector lastXval_;
|
|
Vector lastGradient_;
|
|
Method method_;
|
|
Scalar constStep_;
|
|
|
|
Scalar constantStep() const { return constStep_; }
|
|
|
|
Scalar directStep(const Vector &xval, const Vector &gradient) {
|
|
auto sk = xval - lastXval_;
|
|
auto yk = gradient - lastGradient_;
|
|
Scalar num = sk.dot(sk);
|
|
Scalar denom = sk.dot(yk);
|
|
|
|
if (denom == 0)
|
|
return 1;
|
|
else
|
|
return std::abs(num / denom);
|
|
}
|
|
|
|
Scalar inverseStep(const Vector &xval, const Vector &gradient) {
|
|
auto sk = xval - lastXval_;
|
|
auto yk = gradient - lastGradient_;
|
|
Scalar num = sk.dot(yk);
|
|
Scalar denom = yk.dot(yk);
|
|
|
|
if (denom == 0)
|
|
return 1;
|
|
else
|
|
return std::abs(num / denom);
|
|
}
|
|
|
|
public:
|
|
BarzilaiBorwein() : BarzilaiBorwein(Method::Inverse, 1) {}
|
|
|
|
BarzilaiBorwein(const Method method, const Scalar constStep)
|
|
: lastXval_(), lastGradient_(), method_(method), constStep_(constStep) {}
|
|
|
|
void setObjective(const Objective &) {}
|
|
|
|
void setFiniteDifferences(const FiniteDifferences &) {}
|
|
|
|
void setMethod(const Method method) { method_ = method; }
|
|
|
|
void setConstStepSize(const Scalar stepSize) { constStep_ = stepSize; }
|
|
|
|
Scalar operator()(const Vector &xval, const Scalar, const Vector &gradient) {
|
|
Scalar stepSize = 0;
|
|
if (lastXval_.size() == 0) {
|
|
stepSize = constStep_;
|
|
} else {
|
|
switch (method_) {
|
|
case Method::Direct:
|
|
stepSize = directStep(xval, gradient);
|
|
break;
|
|
case Method::Inverse:
|
|
stepSize = inverseStep(xval, gradient);
|
|
break;
|
|
default:
|
|
assert(false);
|
|
break;
|
|
}
|
|
}
|
|
|
|
lastGradient_ = gradient;
|
|
lastXval_ = xval;
|
|
|
|
return stepSize;
|
|
}
|
|
};
|
|
|
|
/** Step size functor to perform Armijo Linesearch with backtracking.
|
|
* The functor iteratively decreases the step size until the following
|
|
* conditions are met:
|
|
*
|
|
* Armijo: f(x - stepSize * grad(x)) <= f(x) - cArmijo * stepSize * grad(x)^T *
|
|
* grad(x)
|
|
*
|
|
* If either condition does not hold the step size is decreased:
|
|
*
|
|
* stepSize = decrease * stepSize */
|
|
template <typename Scalar> class ArmijoBacktracking {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &, Vector &)> Objective;
|
|
typedef std::function<void(const Vector &, const Scalar, Vector &)>
|
|
FiniteDifferences;
|
|
|
|
protected:
|
|
Scalar decrease_;
|
|
Scalar cArmijo_;
|
|
Scalar minStep_;
|
|
Scalar maxStep_;
|
|
Index maxIt_;
|
|
Objective objective_;
|
|
FiniteDifferences finiteDifferences_;
|
|
|
|
Scalar evaluateObjective(const Vector &xval, Vector &gradient) {
|
|
gradient.resize(0);
|
|
Scalar fval = objective_(xval, gradient);
|
|
if (gradient.size() == 0)
|
|
finiteDifferences_(xval, fval, gradient);
|
|
return fval;
|
|
}
|
|
|
|
virtual bool computeSecondCondition(const Scalar, const Scalar, const Scalar,
|
|
const Vector &, const Vector &) {
|
|
return true;
|
|
}
|
|
|
|
public:
|
|
ArmijoBacktracking()
|
|
: ArmijoBacktracking(0.8, 1e-4, 1e-20, 1, 0) {} // NOTE: maxStep was 1
|
|
|
|
ArmijoBacktracking(const Scalar decrease, const Scalar cArmijo,
|
|
const Scalar minStep, const Scalar maxStep,
|
|
const Index iterations)
|
|
: decrease_(decrease), cArmijo_(cArmijo), minStep_(minStep),
|
|
maxStep_(maxStep), maxIt_(iterations), objective_() {
|
|
assert(decrease > 0);
|
|
assert(decrease < 1);
|
|
assert(cArmijo > 0);
|
|
assert(cArmijo < 0.5);
|
|
assert(minStep < maxStep);
|
|
}
|
|
|
|
/** Set the decreasing factor for backtracking.
|
|
* Assure that decrease in (0, 1).
|
|
* @param decrease decreasing factor */
|
|
void setBacktrackingDecrease(const Scalar decrease) {
|
|
assert(decrease > 0);
|
|
assert(decrease < 1);
|
|
decrease_ = decrease;
|
|
}
|
|
|
|
/** Set the relaxation constant for the Armijo condition (see class
|
|
* description).
|
|
* Assure cArmijo in (0, 0.5).
|
|
* @param cArmijo armijo constant */
|
|
void setArmijoConstant(const Scalar cArmijo) {
|
|
assert(cArmijo > 0);
|
|
assert(cArmijo < 0.5);
|
|
cArmijo_ = cArmijo;
|
|
}
|
|
|
|
/** Set the bounds for the step size during linesearch.
|
|
* The final step size is guaranteed to be in [minStep, maxStep].
|
|
* @param minStep minimum step size
|
|
* @param maxStep maximum step size */
|
|
void setStepBounds(const Scalar minStep, const Scalar maxStep) {
|
|
assert(minStep < maxStep);
|
|
minStep_ = minStep;
|
|
maxStep_ = maxStep;
|
|
}
|
|
|
|
/** Set the maximum number of iterations.
|
|
* Set to 0 or negative for infinite iterations.
|
|
* @param iterations maximum number of iterations */
|
|
void setMaxIterations(const Index iterations) { maxIt_ = iterations; }
|
|
|
|
void setObjective(const Objective &objective) { objective_ = objective; }
|
|
|
|
void setFiniteDifferences(const FiniteDifferences &finiteDifferences) {
|
|
finiteDifferences_ = finiteDifferences;
|
|
}
|
|
|
|
Scalar operator()(const Vector &xval, const Scalar fval,
|
|
const Vector &gradient) {
|
|
assert(objective_);
|
|
assert(finiteDifferences_);
|
|
|
|
Scalar stepSize = maxStep_ / decrease_;
|
|
Vector gradientN;
|
|
Vector xvalN;
|
|
Scalar fvalN;
|
|
bool armijoCondition = false;
|
|
bool secondCondition = false;
|
|
|
|
Index iterations = 0;
|
|
while ((maxIt_ <= 0 || iterations < maxIt_) &&
|
|
stepSize * decrease_ >= minStep_ &&
|
|
!(armijoCondition && secondCondition)) {
|
|
stepSize = decrease_ * stepSize;
|
|
xvalN = xval - stepSize * gradient;
|
|
fvalN = evaluateObjective(xvalN, gradientN);
|
|
|
|
armijoCondition =
|
|
fvalN <= fval - cArmijo_ * stepSize * gradient.dot(gradient);
|
|
secondCondition =
|
|
computeSecondCondition(stepSize, fval, fvalN, gradient, gradientN);
|
|
|
|
++iterations;
|
|
}
|
|
|
|
return stepSize;
|
|
}
|
|
};
|
|
|
|
/** Step size functor to perform Wolfe Linesearch with backtracking.
|
|
* The functor iteratively decreases the step size until the following
|
|
* conditions are met:
|
|
*
|
|
* Armijo: f(x - stepSize * grad(x)) <= f(x) - cArmijo * stepSize * grad(x)^T *
|
|
* grad(x) Wolfe: grad(x)^T grad(x - stepSize * grad(x)) <= cWolfe * grad(x)^T *
|
|
* grad(x)
|
|
*
|
|
* If either condition does not hold the step size is decreased:
|
|
*
|
|
* stepSize = decrease * stepSize */
|
|
template <typename Scalar>
|
|
class WolfeBacktracking : public ArmijoBacktracking<Scalar> {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &, Vector &)> Objective;
|
|
typedef std::function<void(const Vector &, const Scalar, Vector &)>
|
|
FiniteDifferences;
|
|
|
|
protected:
|
|
Scalar cWolfe_;
|
|
|
|
virtual bool computeSecondCondition(const Scalar, const Scalar, const Scalar,
|
|
const Vector &gradient,
|
|
const Vector &gradientN) {
|
|
return gradient.dot(gradientN) <= cWolfe_ * gradient.dot(gradient);
|
|
}
|
|
|
|
public:
|
|
WolfeBacktracking() : WolfeBacktracking(0.8, 1e-4, 0.9, 1e-20, 1, 0) {}
|
|
|
|
WolfeBacktracking(const Scalar decrease, const Scalar cArmijo,
|
|
const Scalar cWolfe, const Scalar minStep,
|
|
const Scalar maxStep, const Index iterations)
|
|
: ArmijoBacktracking<Scalar>(decrease, cArmijo, minStep, maxStep,
|
|
iterations),
|
|
cWolfe_(cWolfe) {
|
|
assert(cWolfe < 1);
|
|
assert(cArmijo < cWolfe);
|
|
}
|
|
|
|
/** Set the wolfe constants for Armijo and Wolfe condition (see class
|
|
* description).
|
|
* Assure that c1 < c2 < 1 and c1 in (0, 0.5).
|
|
* @param c1 armijo constant
|
|
* @param c2 wolfe constant */
|
|
void setWolfeConstant(const Scalar cWolfe) {
|
|
assert(cWolfe < 1);
|
|
cWolfe_ = cWolfe;
|
|
}
|
|
};
|
|
|
|
/** Step size functor which searches for a step that reduces the function
|
|
* value.
|
|
* The functor iteratively decreases the step size until the following
|
|
* condition is met:
|
|
*
|
|
* f(x - stepSize * grad) < f(x)
|
|
*
|
|
* If this condition does not hold the step size is decreased:
|
|
*
|
|
* stepSize = decrease * stepSize
|
|
*
|
|
* This functor does not require to compute any gradients and does not use
|
|
* finite differences. */
|
|
template <typename Scalar> class DecreaseBacktracking {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
typedef std::function<Scalar(const Vector &, Vector &)> Objective;
|
|
typedef std::function<void(const Vector &, const Scalar, Vector &)>
|
|
FiniteDifferences;
|
|
|
|
private:
|
|
Scalar decrease_;
|
|
Scalar minStep_;
|
|
Scalar maxStep_;
|
|
Index maxIt_;
|
|
Objective objective_;
|
|
|
|
public:
|
|
DecreaseBacktracking() : DecreaseBacktracking(0.8, 1e-12, 1, 0) {}
|
|
|
|
DecreaseBacktracking(const Scalar decrease, const Scalar minStep,
|
|
const Scalar maxStep, const Index iterations)
|
|
: decrease_(decrease), minStep_(minStep), maxStep_(maxStep),
|
|
maxIt_(iterations), objective_() {}
|
|
|
|
/** Set the decreasing factor for backtracking.
|
|
* Assure that decrease in (0, 1).
|
|
* @param decrease decreasing factor */
|
|
void setBacktrackingDecrease(const Scalar decrease) { decrease_ = decrease; }
|
|
|
|
/** Set the bounds for the step size during linesearch.
|
|
* The final step size is guaranteed to be in [minStep, maxStep].
|
|
* @param minStep minimum step size
|
|
* @param maxStep maximum step size */
|
|
void setStepBounds(const Scalar minStep, const Scalar maxStep) {
|
|
assert(minStep < maxStep);
|
|
minStep_ = minStep;
|
|
maxStep_ = maxStep;
|
|
}
|
|
|
|
/** Set the maximum number of iterations.
|
|
* Set to 0 or negative for infinite iterations.
|
|
* @param iterations maximum number of iterations */
|
|
void setMaxIterations(const Index iterations) { maxIt_ = iterations; }
|
|
|
|
void setObjective(const Objective &objective) { objective_ = objective; }
|
|
|
|
void setFiniteDifferences(const FiniteDifferences &) {}
|
|
|
|
Scalar operator()(const Vector &xval, const Scalar fval,
|
|
const Vector &gradient) {
|
|
assert(objective_);
|
|
|
|
Scalar stepSize = maxStep_ / decrease_;
|
|
Vector xvalN;
|
|
Vector gradientN;
|
|
Scalar fvalN;
|
|
bool improvement = false;
|
|
|
|
Index iterations = 0;
|
|
while ((maxIt_ <= 0 || iterations < maxIt_) &&
|
|
stepSize * decrease_ >= minStep_ && !improvement) {
|
|
stepSize = decrease_ * stepSize;
|
|
xvalN = xval - stepSize * gradient;
|
|
fvalN = objective_(xvalN, gradientN);
|
|
|
|
improvement = fvalN < fval;
|
|
|
|
++iterations;
|
|
}
|
|
|
|
return stepSize;
|
|
}
|
|
};
|
|
|
|
template <typename Scalar, typename Objective,
|
|
typename StepSize = BarzilaiBorwein<Scalar>,
|
|
typename Callback = NoCallback<Scalar>,
|
|
typename FiniteDifferences = CentralDifferences<Scalar>>
|
|
class GradientDescent {
|
|
public:
|
|
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
|
|
|
|
struct Result {
|
|
Index iterations;
|
|
bool converged;
|
|
Scalar fval;
|
|
Vector xval;
|
|
};
|
|
|
|
protected:
|
|
Index maxIt_;
|
|
Scalar minGradientLen_;
|
|
Scalar minStepLen_;
|
|
Scalar momentum_;
|
|
Index verbosity_;
|
|
Objective objective_;
|
|
StepSize stepSize_;
|
|
Callback callback_;
|
|
FiniteDifferences finiteDifferences_;
|
|
|
|
Scalar evaluateObjective(const Vector &xval, Vector &gradient) {
|
|
gradient.resize(0);
|
|
Scalar fval = objective_(xval, gradient);
|
|
if (gradient.size() == 0)
|
|
finiteDifferences_(xval, fval, gradient);
|
|
return fval;
|
|
}
|
|
|
|
std::string vector2str(const Vector &vec) const {
|
|
std::stringstream ss1;
|
|
ss1 << std::fixed << std::showpoint << std::setprecision(16);
|
|
std::stringstream ss2;
|
|
ss2 << '[';
|
|
for (Index i = 0; i < vec.size(); ++i) {
|
|
ss1 << vec(i);
|
|
ss2 << std::setfill(' ') << std::setw(10) << ss1.str();
|
|
if (i != vec.size() - 1)
|
|
ss2 << ' ';
|
|
ss1.str("");
|
|
}
|
|
ss2 << ']';
|
|
|
|
return ss2.str();
|
|
}
|
|
|
|
public:
|
|
GradientDescent()
|
|
: maxIt_(0), minGradientLen_(static_cast<Scalar>(1e-2)),
|
|
minStepLen_(static_cast<Scalar>(1e-6)), momentum_(0), verbosity_(0),
|
|
objective_(), stepSize_(), callback_(), finiteDifferences_() {}
|
|
|
|
~GradientDescent() {}
|
|
|
|
void setThreads(const Index threads) {
|
|
finiteDifferences_.setThreads(threads);
|
|
}
|
|
|
|
void setNumericalEpsilon(const Scalar eps) {
|
|
finiteDifferences_.setNumericalEpsilon(eps);
|
|
}
|
|
|
|
void setMaxIterations(const Index iterations) { maxIt_ = iterations; }
|
|
|
|
void setObjective(const Objective &objective) { objective_ = objective; }
|
|
|
|
void setCallback(const Callback &callback) { callback_ = callback; }
|
|
|
|
void setMinGradientLength(const Scalar gradientLen) {
|
|
minGradientLen_ = gradientLen;
|
|
}
|
|
|
|
void setMinStepLength(const Scalar stepLen) { minStepLen_ = stepLen; }
|
|
|
|
void setStepSize(const StepSize stepSize) { stepSize_ = stepSize; }
|
|
|
|
void setMomentum(const Scalar momentum) { momentum_ = momentum; }
|
|
|
|
void setVerbosity(const Index verbosity) { verbosity_ = verbosity; }
|
|
|
|
Result minimize(const Vector &initialGuess) {
|
|
finiteDifferences_.setObjective([this](const Vector &xval) {
|
|
Vector tmp;
|
|
return this->objective_(xval, tmp);
|
|
});
|
|
stepSize_.setObjective([this](const Vector &xval, Vector &gradient) {
|
|
return this->objective_(xval, gradient);
|
|
});
|
|
stepSize_.setFiniteDifferences(
|
|
[this](const Vector &xval, const Scalar fval, Vector &gradient) {
|
|
this->finiteDifferences_(xval, fval, gradient);
|
|
});
|
|
|
|
Vector xval = initialGuess;
|
|
Vector gradient;
|
|
Scalar fval;
|
|
Scalar gradientLen = minGradientLen_ + 1;
|
|
Scalar stepSize;
|
|
Vector step = Vector::Zero(xval.size());
|
|
Scalar stepLen = minStepLen_ + 1;
|
|
bool callbackResult = true;
|
|
|
|
Index iterations = 0;
|
|
while ((maxIt_ <= 0 || iterations < maxIt_) &&
|
|
gradientLen >= minGradientLen_ && stepLen >= minStepLen_ &&
|
|
callbackResult) {
|
|
xval -= step;
|
|
fval = evaluateObjective(xval, gradient);
|
|
gradientLen = gradient.norm();
|
|
// update step according to step size and momentum
|
|
stepSize = stepSize_(xval, fval, gradient);
|
|
step = momentum_ * step + (1 - momentum_) * stepSize * gradient;
|
|
stepLen = step.norm();
|
|
// evaluate callback an save its result
|
|
callbackResult = callback_(iterations, xval, fval, gradient);
|
|
|
|
if (verbosity_ > 0) {
|
|
std::stringstream ss;
|
|
ss << "it=" << std::setfill('0') << std::setw(4) << iterations
|
|
<< std::fixed << std::showpoint << std::setprecision(20)
|
|
<< " gradlen=" << gradientLen << " stepsize=" << stepSize
|
|
<< " steplen=" << stepLen;
|
|
|
|
if (verbosity_ > 2)
|
|
ss << " callback=" << (callbackResult ? "true" : "false");
|
|
|
|
ss << " fval=" << fval;
|
|
|
|
if (verbosity_ > 1)
|
|
ss << " xval=" << vector2str(xval);
|
|
if (verbosity_ > 2)
|
|
ss << " gradient=" << vector2str(gradient);
|
|
if (verbosity_ > 3)
|
|
ss << " step=" << vector2str(step);
|
|
std::cout << ss.str() << std::endl;
|
|
}
|
|
|
|
++iterations;
|
|
}
|
|
|
|
Result result;
|
|
result.xval = xval;
|
|
result.fval = fval;
|
|
result.iterations = iterations;
|
|
result.converged = gradientLen < minGradientLen_ || stepLen < minStepLen_;
|
|
|
|
return result;
|
|
}
|
|
};
|
|
} // namespace gdc
|
|
|
|
#endif
|