improved efficiency of diagonal damping (avoiding multiple traversal of binary tree)
parent
afb5bac2f7
commit
a6d73e8884
|
@ -31,37 +31,61 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
|
||||
namespace gtsam {
|
||||
|
||||
using boost::adaptors::map_values;
|
||||
|
||||
/* ************************************************************************* */
|
||||
LevenbergMarquardtParams::VerbosityLM LevenbergMarquardtParams::verbosityLMTranslator(const std::string &src) const {
|
||||
std::string s = src; boost::algorithm::to_upper(s);
|
||||
if (s == "SILENT") return LevenbergMarquardtParams::SILENT;
|
||||
if (s == "LAMBDA") return LevenbergMarquardtParams::LAMBDA;
|
||||
if (s == "TRYLAMBDA") return LevenbergMarquardtParams::TRYLAMBDA;
|
||||
if (s == "TRYCONFIG") return LevenbergMarquardtParams::TRYCONFIG;
|
||||
if (s == "TRYDELTA") return LevenbergMarquardtParams::TRYDELTA;
|
||||
if (s == "DAMPED") return LevenbergMarquardtParams::DAMPED;
|
||||
LevenbergMarquardtParams::VerbosityLM LevenbergMarquardtParams::verbosityLMTranslator(
|
||||
const std::string &src) const {
|
||||
std::string s = src;
|
||||
boost::algorithm::to_upper(s);
|
||||
if (s == "SILENT")
|
||||
return LevenbergMarquardtParams::SILENT;
|
||||
if (s == "LAMBDA")
|
||||
return LevenbergMarquardtParams::LAMBDA;
|
||||
if (s == "TRYLAMBDA")
|
||||
return LevenbergMarquardtParams::TRYLAMBDA;
|
||||
if (s == "TRYCONFIG")
|
||||
return LevenbergMarquardtParams::TRYCONFIG;
|
||||
if (s == "TRYDELTA")
|
||||
return LevenbergMarquardtParams::TRYDELTA;
|
||||
if (s == "DAMPED")
|
||||
return LevenbergMarquardtParams::DAMPED;
|
||||
|
||||
/* default is silent */
|
||||
return LevenbergMarquardtParams::SILENT;
|
||||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
std::string LevenbergMarquardtParams::verbosityLMTranslator(VerbosityLM value) const {
|
||||
std::string LevenbergMarquardtParams::verbosityLMTranslator(
|
||||
VerbosityLM value) const {
|
||||
std::string s;
|
||||
switch (value) {
|
||||
case LevenbergMarquardtParams::SILENT: s = "SILENT" ; break;
|
||||
case LevenbergMarquardtParams::TERMINATION: s = "TERMINATION" ; break;
|
||||
case LevenbergMarquardtParams::LAMBDA: s = "LAMBDA" ; break;
|
||||
case LevenbergMarquardtParams::TRYLAMBDA: s = "TRYLAMBDA" ; break;
|
||||
case LevenbergMarquardtParams::TRYCONFIG: s = "TRYCONFIG" ; break;
|
||||
case LevenbergMarquardtParams::TRYDELTA: s = "TRYDELTA" ; break;
|
||||
case LevenbergMarquardtParams::DAMPED: s = "DAMPED" ; break;
|
||||
default: s = "UNDEFINED" ; break;
|
||||
case LevenbergMarquardtParams::SILENT:
|
||||
s = "SILENT";
|
||||
break;
|
||||
case LevenbergMarquardtParams::TERMINATION:
|
||||
s = "TERMINATION";
|
||||
break;
|
||||
case LevenbergMarquardtParams::LAMBDA:
|
||||
s = "LAMBDA";
|
||||
break;
|
||||
case LevenbergMarquardtParams::TRYLAMBDA:
|
||||
s = "TRYLAMBDA";
|
||||
break;
|
||||
case LevenbergMarquardtParams::TRYCONFIG:
|
||||
s = "TRYCONFIG";
|
||||
break;
|
||||
case LevenbergMarquardtParams::TRYDELTA:
|
||||
s = "TRYDELTA";
|
||||
break;
|
||||
case LevenbergMarquardtParams::DAMPED:
|
||||
s = "DAMPED";
|
||||
break;
|
||||
default:
|
||||
s = "UNDEFINED";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
@ -73,10 +97,12 @@ void LevenbergMarquardtParams::print(const std::string& str) const {
|
|||
std::cout << " lambdaFactor: " << lambdaFactor << "\n";
|
||||
std::cout << " lambdaUpperBound: " << lambdaUpperBound << "\n";
|
||||
std::cout << " lambdaLowerBound: " << lambdaLowerBound << "\n";
|
||||
std::cout << " disableInnerIterations: " << disableInnerIterations << "\n";
|
||||
std::cout << " disableInnerIterations: " << disableInnerIterations
|
||||
<< "\n";
|
||||
std::cout << " minModelFidelity: " << minModelFidelity << "\n";
|
||||
std::cout << " diagonalDamping: " << diagonalDamping << "\n";
|
||||
std::cout << " verbosityLM: " << verbosityLMTranslator(verbosityLM) << "\n";
|
||||
std::cout << " verbosityLM: "
|
||||
<< verbosityLMTranslator(verbosityLM) << "\n";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
|
@ -86,10 +112,10 @@ GaussianFactorGraph::shared_ptr LevenbergMarquardtOptimizer::linearize() const {
|
|||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
void LevenbergMarquardtOptimizer::increaseLambda(){
|
||||
if(params_.useFixedLambdaFactor_){
|
||||
void LevenbergMarquardtOptimizer::increaseLambda() {
|
||||
if (params_.useFixedLambdaFactor_) {
|
||||
state_.lambda *= params_.lambdaFactor;
|
||||
}else{
|
||||
} else {
|
||||
state_.lambda *= params_.lambdaFactor;
|
||||
params_.lambdaFactor *= 2.0;
|
||||
}
|
||||
|
@ -97,11 +123,11 @@ void LevenbergMarquardtOptimizer::increaseLambda(){
|
|||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
void LevenbergMarquardtOptimizer::decreaseLambda(double stepQuality){
|
||||
void LevenbergMarquardtOptimizer::decreaseLambda(double stepQuality) {
|
||||
|
||||
if(params_.useFixedLambdaFactor_){
|
||||
if (params_.useFixedLambdaFactor_) {
|
||||
state_.lambda /= params_.lambdaFactor;
|
||||
}else{
|
||||
} else {
|
||||
// CHECK_GT(step_quality, 0.0);
|
||||
state_.lambda *= std::max(1.0 / 3.0, 1.0 - pow(2.0 * stepQuality - 1.0, 3));
|
||||
params_.lambdaFactor = 2.0;
|
||||
|
@ -138,21 +164,29 @@ GaussianFactorGraph LevenbergMarquardtOptimizer::buildDampedSystem(
|
|||
double sigma = 1.0 / std::sqrt(state_.lambda);
|
||||
GaussianFactorGraph damped = linear;
|
||||
damped.reserve(damped.size() + state_.values.size());
|
||||
BOOST_FOREACH(const Values::KeyValuePair& key_value, state_.values) {
|
||||
size_t dim = key_value.value.dim();
|
||||
Matrix A = Matrix::Identity(dim, dim);
|
||||
//Replace the identity matrix with diagonal of Hessian
|
||||
if (params_.diagonalDamping){
|
||||
if (params_.diagonalDamping) {
|
||||
BOOST_FOREACH(const VectorValues::KeyValuePair& key_vector, state_.hessianDiagonal) {
|
||||
// Fill in the diagonal of A with diag(hessian)
|
||||
try {
|
||||
A.diagonal() = state_.hessianDiagonal.at(key_value.key);
|
||||
Matrix A = Eigen::DiagonalMatrix<double, Eigen::Dynamic>(state_.hessianDiagonal.at(key_vector.first));
|
||||
size_t dim = key_vector.second.size();
|
||||
Vector b = Vector::Zero(dim);
|
||||
SharedDiagonal model = noiseModel::Isotropic::Sigma(dim, sigma);
|
||||
damped += boost::make_shared<JacobianFactor>(key_vector.first, A, b, model);
|
||||
} catch (std::exception e) {
|
||||
// Don't attempt any damping if no key found in diagonal
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Vector b = Vector::Zero(dim);
|
||||
SharedDiagonal model = noiseModel::Isotropic::Sigma(dim, sigma);
|
||||
damped += boost::make_shared<JacobianFactor>(key_value.key, A, b, model);
|
||||
} else {
|
||||
// Straightforward damping:
|
||||
BOOST_FOREACH(const Values::KeyValuePair& key_value, state_.values) {
|
||||
size_t dim = key_value.value.dim();
|
||||
Matrix A = Matrix::Identity(dim, dim);
|
||||
Vector b = Vector::Zero(dim);
|
||||
SharedDiagonal model = noiseModel::Isotropic::Sigma(dim, sigma);
|
||||
damped += boost::make_shared<JacobianFactor>(key_value.key, A, b, model);
|
||||
}
|
||||
}
|
||||
gttoc(damp);
|
||||
return damped;
|
||||
|
@ -161,20 +195,22 @@ GaussianFactorGraph LevenbergMarquardtOptimizer::buildDampedSystem(
|
|||
/* ************************************************************************* */
|
||||
void LevenbergMarquardtOptimizer::iterate() {
|
||||
|
||||
gttic (LM_iterate);
|
||||
gttic(LM_iterate);
|
||||
|
||||
// Pull out parameters we'll use
|
||||
const NonlinearOptimizerParams::Verbosity nloVerbosity = params_.verbosity;
|
||||
const LevenbergMarquardtParams::VerbosityLM lmVerbosity = params_.verbosityLM;
|
||||
|
||||
// Linearize graph
|
||||
if(lmVerbosity >= LevenbergMarquardtParams::DAMPED) cout << "linearizing = " << endl;
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::DAMPED)
|
||||
cout << "linearizing = " << endl;
|
||||
GaussianFactorGraph::shared_ptr linear = linearize();
|
||||
|
||||
// Keep increasing lambda until we make make progress
|
||||
while (true) {
|
||||
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA) cout << "trying lambda = " << state_.lambda << endl;
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
|
||||
cout << "trying lambda = " << state_.lambda << endl;
|
||||
|
||||
// Build damped system for this lambda (adds prior factors that make it like gradient descent)
|
||||
GaussianFactorGraph dampedSystem = buildDampedSystem(*linear);
|
||||
|
@ -183,9 +219,11 @@ void LevenbergMarquardtOptimizer::iterate() {
|
|||
if (!params_.logFile.empty()) {
|
||||
ofstream os(params_.logFile.c_str(), ios::app);
|
||||
|
||||
boost::posix_time::ptime currentTime = boost::posix_time::microsec_clock::universal_time();
|
||||
boost::posix_time::ptime currentTime =
|
||||
boost::posix_time::microsec_clock::universal_time();
|
||||
|
||||
os << state_.totalNumberInnerIterations << "," << 1e-6 * (currentTime - state_.startTime).total_microseconds() << ","
|
||||
os << state_.totalNumberInnerIterations << ","
|
||||
<< 1e-6 * (currentTime - state_.startTime).total_microseconds() << ","
|
||||
<< state_.error << "," << state_.lambda << endl;
|
||||
}
|
||||
|
||||
|
@ -203,44 +241,48 @@ void LevenbergMarquardtOptimizer::iterate() {
|
|||
try {
|
||||
delta = solve(dampedSystem, state_.values, params_);
|
||||
systemSolvedSuccessfully = true;
|
||||
} catch(IndeterminantLinearSystemException& e) {
|
||||
} catch (IndeterminantLinearSystemException& e) {
|
||||
systemSolvedSuccessfully = false;
|
||||
}
|
||||
|
||||
if(systemSolvedSuccessfully) {
|
||||
if (systemSolvedSuccessfully) {
|
||||
params_.reuse_diagonal_ = true;
|
||||
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA) cout << "linear delta norm = " << delta.norm() << endl;
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYDELTA) delta.print("delta");
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
|
||||
cout << "linear delta norm = " << delta.norm() << endl;
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYDELTA)
|
||||
delta.print("delta");
|
||||
|
||||
// cost change in the linearized system (old - new)
|
||||
double newlinearizedError = linear->error(delta);
|
||||
|
||||
double linearizedCostChange = state_.error - newlinearizedError;
|
||||
|
||||
if(linearizedCostChange >= 0){ // step is valid
|
||||
if (linearizedCostChange >= 0) { // step is valid
|
||||
// update values
|
||||
gttic (retract);
|
||||
gttic(retract);
|
||||
newValues = state_.values.retract(delta);
|
||||
gttoc(retract);
|
||||
|
||||
// compute new error
|
||||
gttic (compute_error);
|
||||
if(lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA) cout << "calculating error" << endl;
|
||||
gttic(compute_error);
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
|
||||
cout << "calculating error" << endl;
|
||||
newError = graph_.error(newValues);
|
||||
gttoc(compute_error);
|
||||
|
||||
// cost change in the original, nonlinear system (old - new)
|
||||
double costChange = state_.error - newError;
|
||||
|
||||
double absolute_function_tolerance = params_.relativeErrorTol * state_.error;
|
||||
double absolute_function_tolerance = params_.relativeErrorTol
|
||||
* state_.error;
|
||||
if (fabs(costChange) >= absolute_function_tolerance) {
|
||||
// fidelity of linearized model VS original system between
|
||||
if(linearizedCostChange > 1e-15){
|
||||
if (linearizedCostChange > 1e-15) {
|
||||
modelFidelity = costChange / linearizedCostChange;
|
||||
// if we decrease the error in the nonlinear system and modelFidelity is above threshold
|
||||
step_is_successful = modelFidelity > params_.minModelFidelity;
|
||||
}else{
|
||||
} else {
|
||||
step_is_successful = true; // linearizedCostChange close to zero
|
||||
}
|
||||
} else {
|
||||
|
@ -249,20 +291,23 @@ void LevenbergMarquardtOptimizer::iterate() {
|
|||
}
|
||||
}
|
||||
|
||||
if(step_is_successful){ // we have successfully decreased the cost and we have good modelFidelity
|
||||
if (step_is_successful) { // we have successfully decreased the cost and we have good modelFidelity
|
||||
state_.values.swap(newValues);
|
||||
state_.error = newError;
|
||||
decreaseLambda(modelFidelity);
|
||||
break;
|
||||
}else if (!stopSearchingLambda){ // we failed to solved the system or we had no decrease in cost
|
||||
} else if (!stopSearchingLambda) { // we failed to solved the system or we had no decrease in cost
|
||||
if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
|
||||
cout << "increasing lambda: old error (" << state_.error << ") new error (" << newError << ")" << endl;
|
||||
cout << "increasing lambda: old error (" << state_.error
|
||||
<< ") new error (" << newError << ")" << endl;
|
||||
increaseLambda();
|
||||
|
||||
// check if lambda is too big
|
||||
if(state_.lambda >= params_.lambdaUpperBound) {
|
||||
if(nloVerbosity >= NonlinearOptimizerParams::TERMINATION)
|
||||
cout << "Warning: Levenberg-Marquardt giving up because cannot decrease error with maximum lambda" << endl;
|
||||
if (state_.lambda >= params_.lambdaUpperBound) {
|
||||
if (nloVerbosity >= NonlinearOptimizerParams::TERMINATION)
|
||||
cout
|
||||
<< "Warning: Levenberg-Marquardt giving up because cannot decrease error with maximum lambda"
|
||||
<< endl;
|
||||
break;
|
||||
}
|
||||
} else { // the change in the cost is very small and it is not worth trying bigger lambdas
|
||||
|
@ -276,9 +321,8 @@ void LevenbergMarquardtOptimizer::iterate() {
|
|||
|
||||
/* ************************************************************************* */
|
||||
LevenbergMarquardtParams LevenbergMarquardtOptimizer::ensureHasOrdering(
|
||||
LevenbergMarquardtParams params, const NonlinearFactorGraph& graph) const
|
||||
{
|
||||
if(!params.ordering)
|
||||
LevenbergMarquardtParams params, const NonlinearFactorGraph& graph) const {
|
||||
if (!params.ordering)
|
||||
params.ordering = Ordering::COLAMD(graph);
|
||||
return params;
|
||||
}
|
||||
|
|
|
@ -234,7 +234,7 @@ TEST(NonlinearOptimizer, NullFactor) {
|
|||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
TEST(NonlinearOptimizer, MoreOptimization) {
|
||||
TEST_UNSAFE(NonlinearOptimizer, MoreOptimization) {
|
||||
|
||||
NonlinearFactorGraph fg;
|
||||
fg += PriorFactor<Pose2>(0, Pose2(0, 0, 0),
|
||||
|
@ -284,57 +284,59 @@ TEST(NonlinearOptimizer, MoreOptimization) {
|
|||
// cout << "===================================================================================" << endl;
|
||||
|
||||
// Try LM with diagonal damping
|
||||
Values initBetter = init;
|
||||
// initBetter.insert(0, Pose2(3,4,0));
|
||||
// initBetter.insert(1, Pose2(10,2,M_PI/3));
|
||||
// initBetter.insert(2, Pose2(11,7,M_PI/2));
|
||||
Values initBetter;
|
||||
initBetter.insert(0, Pose2(3,4,0));
|
||||
initBetter.insert(1, Pose2(10,2,M_PI/3));
|
||||
initBetter.insert(2, Pose2(11,7,M_PI/2));
|
||||
|
||||
{
|
||||
// params.setDiagonalDamping(true);
|
||||
// LevenbergMarquardtOptimizer optimizer(fg, initBetter, params);
|
||||
//
|
||||
// // test the diagonal
|
||||
// GaussianFactorGraph::shared_ptr linear = optimizer.linearize();
|
||||
// GaussianFactorGraph damped = optimizer.buildDampedSystem(*linear);
|
||||
// VectorValues d = linear->hessianDiagonal(), //
|
||||
// expectedDiagonal = d + params.lambdaInitial * d;
|
||||
// EXPECT(assert_equal(expectedDiagonal, damped.hessianDiagonal()));
|
||||
//
|
||||
// // test convergence (does not!)
|
||||
// Values actual = optimizer.optimize();
|
||||
// EXPECT(assert_equal(expected, actual));
|
||||
//
|
||||
// // Check that the gradient is zero (it is not!)
|
||||
// linear = optimizer.linearize();
|
||||
// EXPECT(assert_equal(expectedGradient,linear->gradientAtZero()));
|
||||
//
|
||||
// // Check that the gradient is zero for damped system (it is not!)
|
||||
// damped = optimizer.buildDampedSystem(*linear);
|
||||
// VectorValues actualGradient = damped.gradientAtZero();
|
||||
// EXPECT(assert_equal(expectedGradient,actualGradient));
|
||||
//
|
||||
// // Check errors at convergence and errors in direction of gradient (decreases!)
|
||||
// EXPECT_DOUBLES_EQUAL(46.02558,fg.error(actual),1e-5);
|
||||
// EXPECT_DOUBLES_EQUAL(44.742237,fg.error(actual.retract(-0.01*actualGradient)),1e-5);
|
||||
//
|
||||
// // Check that solve yields gradient (it's not equal to gradient, as predicted)
|
||||
// VectorValues delta = damped.optimize();
|
||||
// double factor = actualGradient[0][0]/delta[0][0];
|
||||
// EXPECT(assert_equal(actualGradient,factor*delta));
|
||||
//
|
||||
// // Still pointing downhill wrt actual gradient !
|
||||
// EXPECT_DOUBLES_EQUAL( 0.1056157,dot(-1*actualGradient,delta),1e-3);
|
||||
//
|
||||
// // delta.print("This is the delta value computed by LM with diagonal damping");
|
||||
//
|
||||
// // Still pointing downhill wrt expected gradient (IT DOES NOT! actually they are perpendicular)
|
||||
// EXPECT_DOUBLES_EQUAL( 0.0,dot(-1*expectedGradient,delta),1e-5);
|
||||
//
|
||||
// // Check errors at convergence and errors in direction of solution (does not decrease!)
|
||||
// EXPECT_DOUBLES_EQUAL(46.0254859,fg.error(actual.retract(delta)),1e-5);
|
||||
//
|
||||
// // Check errors at convergence and errors at a small step in direction of solution (does not decrease!)
|
||||
// EXPECT_DOUBLES_EQUAL(46.0255,fg.error(actual.retract(0.01*delta)),1e-3);
|
||||
params.setDiagonalDamping(true);
|
||||
LevenbergMarquardtOptimizer optimizer(fg, initBetter, params);
|
||||
|
||||
// test the diagonal
|
||||
GaussianFactorGraph::shared_ptr linear = optimizer.linearize();
|
||||
GaussianFactorGraph damped = optimizer.buildDampedSystem(*linear);
|
||||
VectorValues d = linear->hessianDiagonal(), //
|
||||
expectedDiagonal = d + params.lambdaInitial * d;
|
||||
EXPECT(assert_equal(expectedDiagonal, damped.hessianDiagonal()));
|
||||
|
||||
// test convergence (does not!)
|
||||
Values actual = optimizer.optimize();
|
||||
EXPECT(assert_equal(expected, actual));
|
||||
|
||||
// Check that the gradient is zero (it is not!)
|
||||
linear = optimizer.linearize();
|
||||
EXPECT(assert_equal(expectedGradient,linear->gradientAtZero()));
|
||||
|
||||
// Check that the gradient is zero for damped system (it is not!)
|
||||
damped = optimizer.buildDampedSystem(*linear);
|
||||
VectorValues actualGradient = damped.gradientAtZero();
|
||||
EXPECT(assert_equal(expectedGradient,actualGradient));
|
||||
|
||||
/* This block was made to test the original initial guess "init"
|
||||
// Check errors at convergence and errors in direction of gradient (decreases!)
|
||||
EXPECT_DOUBLES_EQUAL(46.02558,fg.error(actual),1e-5);
|
||||
EXPECT_DOUBLES_EQUAL(44.742237,fg.error(actual.retract(-0.01*actualGradient)),1e-5);
|
||||
|
||||
// Check that solve yields gradient (it's not equal to gradient, as predicted)
|
||||
VectorValues delta = damped.optimize();
|
||||
double factor = actualGradient[0][0]/delta[0][0];
|
||||
EXPECT(assert_equal(actualGradient,factor*delta));
|
||||
|
||||
// Still pointing downhill wrt actual gradient !
|
||||
EXPECT_DOUBLES_EQUAL( 0.1056157,dot(-1*actualGradient,delta),1e-3);
|
||||
|
||||
// delta.print("This is the delta value computed by LM with diagonal damping");
|
||||
|
||||
// Still pointing downhill wrt expected gradient (IT DOES NOT! actually they are perpendicular)
|
||||
EXPECT_DOUBLES_EQUAL( 0.0,dot(-1*expectedGradient,delta),1e-5);
|
||||
|
||||
// Check errors at convergence and errors in direction of solution (does not decrease!)
|
||||
EXPECT_DOUBLES_EQUAL(46.0254859,fg.error(actual.retract(delta)),1e-5);
|
||||
|
||||
// Check errors at convergence and errors at a small step in direction of solution (does not decrease!)
|
||||
EXPECT_DOUBLES_EQUAL(46.0255,fg.error(actual.retract(0.01*delta)),1e-3);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue