Merge pull request #1878 from borglab/cg-methods
commit
05af66296d
|
@ -43,7 +43,7 @@ static VectorValues gradientInPlace(const NonlinearFactorGraph& nfg,
|
||||||
|
|
||||||
NonlinearConjugateGradientOptimizer::NonlinearConjugateGradientOptimizer(
|
NonlinearConjugateGradientOptimizer::NonlinearConjugateGradientOptimizer(
|
||||||
const NonlinearFactorGraph& graph, const Values& initialValues,
|
const NonlinearFactorGraph& graph, const Values& initialValues,
|
||||||
const Parameters& params)
|
const Parameters& params, const DirectionMethod& directionMethod)
|
||||||
: Base(graph, std::unique_ptr<State>(
|
: Base(graph, std::unique_ptr<State>(
|
||||||
new State(initialValues, graph.error(initialValues)))),
|
new State(initialValues, graph.error(initialValues)))),
|
||||||
params_(params) {}
|
params_(params) {}
|
||||||
|
@ -70,7 +70,8 @@ NonlinearConjugateGradientOptimizer::System::advance(const State& current,
|
||||||
|
|
||||||
GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() {
|
GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() {
|
||||||
const auto [newValues, dummy] = nonlinearConjugateGradient<System, Values>(
|
const auto [newValues, dummy] = nonlinearConjugateGradient<System, Values>(
|
||||||
System(graph_), state_->values, params_, true /* single iteration */);
|
System(graph_), state_->values, params_, true /* single iteration */,
|
||||||
|
directionMethod_);
|
||||||
state_.reset(
|
state_.reset(
|
||||||
new State(newValues, graph_.error(newValues), state_->iterations + 1));
|
new State(newValues, graph_.error(newValues), state_->iterations + 1));
|
||||||
|
|
||||||
|
@ -81,8 +82,8 @@ GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() {
|
||||||
const Values& NonlinearConjugateGradientOptimizer::optimize() {
|
const Values& NonlinearConjugateGradientOptimizer::optimize() {
|
||||||
// Optimize until convergence
|
// Optimize until convergence
|
||||||
System system(graph_);
|
System system(graph_);
|
||||||
const auto [newValues, iterations] =
|
const auto [newValues, iterations] = nonlinearConjugateGradient(
|
||||||
nonlinearConjugateGradient(system, state_->values, params_, false);
|
system, state_->values, params_, false, directionMethod_);
|
||||||
state_.reset(
|
state_.reset(
|
||||||
new State(std::move(newValues), graph_.error(newValues), iterations));
|
new State(std::move(newValues), graph_.error(newValues), iterations));
|
||||||
return state_->values;
|
return state_->values;
|
||||||
|
|
|
@ -23,6 +23,57 @@
|
||||||
|
|
||||||
namespace gtsam {
|
namespace gtsam {
|
||||||
|
|
||||||
|
/// Fletcher-Reeves formula for computing β, the direction of steepest descent.
|
||||||
|
template <typename Gradient>
|
||||||
|
double FletcherReeves(const Gradient ¤tGradient,
|
||||||
|
const Gradient &prevGradient) {
|
||||||
|
// Fletcher-Reeves: beta = g_n'*g_n/g_n-1'*g_n-1
|
||||||
|
const double beta =
|
||||||
|
currentGradient.dot(currentGradient) / prevGradient.dot(prevGradient);
|
||||||
|
return beta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Polak-Ribiere formula for computing β, the direction of steepest descent.
|
||||||
|
template <typename Gradient>
|
||||||
|
double PolakRibiere(const Gradient ¤tGradient,
|
||||||
|
const Gradient &prevGradient) {
|
||||||
|
// Polak-Ribiere: beta = g_n'*(g_n-g_n-1)/g_n-1'*g_n-1
|
||||||
|
const double beta =
|
||||||
|
std::max(0.0, currentGradient.dot(currentGradient - prevGradient) /
|
||||||
|
prevGradient.dot(prevGradient));
|
||||||
|
return beta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The Hestenes-Stiefel formula for computing β,
|
||||||
|
/// the direction of steepest descent.
|
||||||
|
template <typename Gradient>
|
||||||
|
double HestenesStiefel(const Gradient ¤tGradient,
|
||||||
|
const Gradient &prevGradient,
|
||||||
|
const Gradient &direction) {
|
||||||
|
// Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1)
|
||||||
|
Gradient d = currentGradient - prevGradient;
|
||||||
|
const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d));
|
||||||
|
return beta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The Dai-Yuan formula for computing β, the direction of steepest descent.
|
||||||
|
template <typename Gradient>
|
||||||
|
double DaiYuan(const Gradient ¤tGradient, const Gradient &prevGradient,
|
||||||
|
const Gradient &direction) {
|
||||||
|
// Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1)
|
||||||
|
const double beta =
|
||||||
|
std::max(0.0, currentGradient.dot(currentGradient) /
|
||||||
|
-direction.dot(currentGradient - prevGradient));
|
||||||
|
return beta;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class DirectionMethod {
|
||||||
|
FletcherReeves,
|
||||||
|
PolakRibiere,
|
||||||
|
HestenesStiefel,
|
||||||
|
DaiYuan
|
||||||
|
};
|
||||||
|
|
||||||
/** An implementation of the nonlinear CG method using the template below */
|
/** An implementation of the nonlinear CG method using the template below */
|
||||||
class GTSAM_EXPORT NonlinearConjugateGradientOptimizer
|
class GTSAM_EXPORT NonlinearConjugateGradientOptimizer
|
||||||
: public NonlinearOptimizer {
|
: public NonlinearOptimizer {
|
||||||
|
@ -51,14 +102,16 @@ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Parameters params_;
|
Parameters params_;
|
||||||
|
DirectionMethod directionMethod_ = DirectionMethod::PolakRibiere;
|
||||||
|
|
||||||
const NonlinearOptimizerParams &_params() const override { return params_; }
|
const NonlinearOptimizerParams &_params() const override { return params_; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Constructor
|
/// Constructor
|
||||||
NonlinearConjugateGradientOptimizer(const NonlinearFactorGraph &graph,
|
NonlinearConjugateGradientOptimizer(
|
||||||
const Values &initialValues,
|
const NonlinearFactorGraph &graph, const Values &initialValues,
|
||||||
const Parameters ¶ms = Parameters());
|
const Parameters ¶ms = Parameters(),
|
||||||
|
const DirectionMethod &directionMethod = DirectionMethod::PolakRibiere);
|
||||||
|
|
||||||
/// Destructor
|
/// Destructor
|
||||||
~NonlinearConjugateGradientOptimizer() override {}
|
~NonlinearConjugateGradientOptimizer() override {}
|
||||||
|
@ -140,7 +193,9 @@ double lineSearch(const S &system, const V currentValues, const W &gradient) {
|
||||||
template <class S, class V>
|
template <class S, class V>
|
||||||
std::tuple<V, int> nonlinearConjugateGradient(
|
std::tuple<V, int> nonlinearConjugateGradient(
|
||||||
const S &system, const V &initial, const NonlinearOptimizerParams ¶ms,
|
const S &system, const V &initial, const NonlinearOptimizerParams ¶ms,
|
||||||
const bool singleIteration, const bool gradientDescent = false) {
|
const bool singleIteration,
|
||||||
|
const DirectionMethod &directionMethod = DirectionMethod::PolakRibiere,
|
||||||
|
const bool gradientDescent = false) {
|
||||||
// GTSAM_CONCEPT_MANIFOLD_TYPE(V)
|
// GTSAM_CONCEPT_MANIFOLD_TYPE(V)
|
||||||
|
|
||||||
size_t iteration = 0;
|
size_t iteration = 0;
|
||||||
|
@ -177,10 +232,23 @@ std::tuple<V, int> nonlinearConjugateGradient(
|
||||||
} else {
|
} else {
|
||||||
prevGradient = currentGradient;
|
prevGradient = currentGradient;
|
||||||
currentGradient = system.gradient(currentValues);
|
currentGradient = system.gradient(currentValues);
|
||||||
// Polak-Ribiere: beta = g'*(g_n-g_n-1)/g_n-1'*g_n-1
|
|
||||||
const double beta =
|
double beta;
|
||||||
std::max(0.0, currentGradient.dot(currentGradient - prevGradient) /
|
switch (directionMethod) {
|
||||||
prevGradient.dot(prevGradient));
|
case DirectionMethod::FletcherReeves:
|
||||||
|
beta = FletcherReeves(currentGradient, prevGradient);
|
||||||
|
break;
|
||||||
|
case DirectionMethod::PolakRibiere:
|
||||||
|
beta = PolakRibiere(currentGradient, prevGradient);
|
||||||
|
break;
|
||||||
|
case DirectionMethod::HestenesStiefel:
|
||||||
|
beta = HestenesStiefel(currentGradient, prevGradient, direction);
|
||||||
|
break;
|
||||||
|
case DirectionMethod::DaiYuan:
|
||||||
|
beta = DaiYuan(currentGradient, prevGradient, direction);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
direction = currentGradient + (beta * direction);
|
direction = currentGradient + (beta * direction);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,49 @@ TEST(NonlinearConjugateGradientOptimizer, Optimize) {
|
||||||
EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4);
|
EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ************************************************************************* */
|
||||||
|
/// Test different direction methods
|
||||||
|
TEST(NonlinearConjugateGradientOptimizer, DirectionMethods) {
|
||||||
|
const auto [graph, initialEstimate] = generateProblem();
|
||||||
|
|
||||||
|
NonlinearOptimizerParams param;
|
||||||
|
param.maxIterations =
|
||||||
|
500; /* requires a larger number of iterations to converge */
|
||||||
|
param.verbosity = NonlinearOptimizerParams::SILENT;
|
||||||
|
|
||||||
|
// Fletcher-Reeves
|
||||||
|
{
|
||||||
|
NonlinearConjugateGradientOptimizer optimizer(
|
||||||
|
graph, initialEstimate, param, DirectionMethod::FletcherReeves);
|
||||||
|
Values result = optimizer.optimize();
|
||||||
|
|
||||||
|
EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4);
|
||||||
|
}
|
||||||
|
// Polak-Ribiere
|
||||||
|
{
|
||||||
|
NonlinearConjugateGradientOptimizer optimizer(
|
||||||
|
graph, initialEstimate, param, DirectionMethod::PolakRibiere);
|
||||||
|
Values result = optimizer.optimize();
|
||||||
|
|
||||||
|
EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4);
|
||||||
|
}
|
||||||
|
// Hestenes-Stiefel
|
||||||
|
{
|
||||||
|
NonlinearConjugateGradientOptimizer optimizer(
|
||||||
|
graph, initialEstimate, param, DirectionMethod::HestenesStiefel);
|
||||||
|
Values result = optimizer.optimize();
|
||||||
|
|
||||||
|
EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4);
|
||||||
|
}
|
||||||
|
// Dai-Yuan
|
||||||
|
{
|
||||||
|
NonlinearConjugateGradientOptimizer optimizer(graph, initialEstimate, param,
|
||||||
|
DirectionMethod::DaiYuan);
|
||||||
|
Values result = optimizer.optimize();
|
||||||
|
|
||||||
|
EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4);
|
||||||
|
}
|
||||||
|
}
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
int main() {
|
int main() {
|
||||||
TestResult tr;
|
TestResult tr;
|
||||||
|
|
Loading…
Reference in New Issue