feat: Add Best Heavy Ball alg to set beta

release/4.3a0
jingwuOUO 2020-09-19 23:28:43 -04:00
parent fc112a4dc7
commit d6e2546cf5
3 changed files with 190 additions and 50 deletions

View File

@ -30,26 +30,47 @@
#include <utility>
#include <vector>
#include <algorithm>
#include <cmath>
#include <chrono>
#include <random>
namespace gtsam {
using Sparse = Eigen::SparseMatrix<double>;
/* ************************************************************************* */
/* ************************************************************************* */
/// MINIMUM EIGENVALUE COMPUTATIONS
/** This is a lightweight struct used in conjunction with Spectra to compute
* the minimum eigenvalue and eigenvector of a sparse matrix A; it has a single
* nontrivial function, perform_op(x,y), that computes and returns the product
* y = (A + sigma*I) x */
struct PowerFunctor {
/**
* \brief Computer i-th Eigenpair with power method
*
* References :
* 1) Rosen, D. and Carlone, L., 2017, September. Computational
* enhancements for certifiably correct SLAM. In Proceedings of the
* International Conference on Intelligent Robots and Systems.
* 2) Yulun Tian and Kasra Khosoussi and David M. Rosen and Jonathan P. How,
* 2020, Aug, Distributed Certifiably Correct Pose-Graph Optimization, Arxiv
* 3) C. de Sa, B. He, I. Mitliagkas, C. Ré, and P. Xu, Accelerated
* stochastic power iteration, in Proc. Mach. Learn. Res., no. 84, 2018, pp. 5867
*
* It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta *
* x_{k-1} \f$ where A is the certificate matrix, x is the ritz vector
*
*/
// Const reference to an externally-held matrix whose minimum-eigenvalue we
// want to compute
const Sparse &A_;
const Matrix &S_;
const int m_n_; // dimension of Matrix A
const int m_nev_; // number of eigenvalues required
// flag for running power method or accelerated power method. If false, the former, vice versa.
bool accelerated_;
// a Polyak momentum term
double beta_;
@ -64,41 +85,55 @@ private:
Matrix sorted_ritz_vectors_; // sorted converged eigenvectors
public:
// Constructor
explicit PowerFunctor(const Sparse &A, int nev, int ncv,
double beta = 0)
: A_(A),
m_n_(A.rows()),
m_nev_(nev),
// m_ncv_(ncv > m_n_ ? m_n_ : ncv),
beta_(beta),
m_niter_(0)
{}
// Constructor
explicit PowerFunctor(const Sparse& A, const Matrix& S, int nev, int ncv,
bool accelerated = false, double beta = 0)
: A_(A),
S_(S),
m_n_(A.rows()),
m_nev_(nev),
// m_ncv_(ncv > m_n_ ? m_n_ : ncv),
accelerated_(accelerated),
beta_(beta),
m_niter_(0) {
// Do nothing
}
int rows() const { return A_.rows(); }
int cols() const { return A_.cols(); }
int rows() const { return A_.rows(); }
int cols() const { return A_.cols(); }
// Matrix-vector multiplication operation
Vector perform_op(const Vector& x1, const Vector& x0) const {
// Matrix-vector multiplication operation
Vector perform_op(const Vector& x1, const Vector& x0) const {
// Do the multiplication
Vector x2 = A_ * x1 - beta_ * x0;
x2.normalize();
return x2;
}
// Do the multiplication using wrapped Eigen vectors
Vector x2 = A_ * x1 - beta_ * x0;
Vector perform_op(const Vector& x1, const Vector& x0, const double beta) const {
Vector x2 = A_ * x1 - beta * x0;
x2.normalize();
return x2;
}
Vector perform_op(const Vector& x1) const {
Vector x2 = A_ * x1;
x2.normalize();
return x2;
}
Vector perform_op(int i) const {
// Do the multiplication using wrapped Eigen vectors
Vector x1 = ritz_vectors_.col(i-1);
Vector x2 = ritz_vectors_.col(i-2);
return perform_op(x1, x2);
if (accelerated_) {
Vector x1 = ritz_vectors_.col(i-1);
Vector x2 = ritz_vectors_.col(i-2);
return perform_op(x1, x2);
} else
return perform_op(ritz_vectors_.col(i-1));
}
long next_long_rand(long seed) {
const unsigned int m_a = 16807;
const unsigned long m_max = 2147483647L;
// long m_rand = m_n_ ? (m_n_ & m_max) : 1;
unsigned long lo, hi;
lo = m_a * (long)(seed & 0xFFFF);
@ -127,6 +162,68 @@ public:
return res;
}
/// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3)
void setBeta() {
if (m_n_ < 10) return;
double maxBeta = beta_;
size_t maxIndex;
std::vector<double> betas = {2/3*maxBeta, 0.99*maxBeta, maxBeta, 1.01*maxBeta, 1.5*maxBeta};
Matrix tmp_ritz_vectors;
tmp_ritz_vectors.resize(m_n_, 10);
tmp_ritz_vectors.setZero();
for (size_t i = 0; i < 10; i++) {
for (size_t k = 0; k < betas.size(); ++k) {
for (size_t j = 1; j < 10; j++) {
// double rayleighQuotient;
if (j <2 ) {
Vector x0 = random_vec(m_n_);
Vector x00 = random_vec(m_n_);
tmp_ritz_vectors.col(0) = perform_op(x0, x00, betas[k]);
tmp_ritz_vectors.col(1) =
perform_op(tmp_ritz_vectors.col(0), x0, betas[k]);
}
else {
tmp_ritz_vectors.col(j) =
perform_op(tmp_ritz_vectors.col(j - 1),
tmp_ritz_vectors.col(j - 2), betas[k]);
}
const Vector x = tmp_ritz_vectors.col(j);
const double up = x.transpose() * A_ * x;
const double down = x.transpose().dot(x);
const double mu = up / down;
if (mu * mu / 4 > maxBeta) {
maxIndex = k;
maxBeta = mu * mu / 4;
break;
}
}
}
}
beta_ = betas[maxIndex];
}
void perturb(int i) {
// generate a 0.03*||x_0||_2 as stated in David's paper
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::mt19937 generator (seed);
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
int n = m_n_;
Vector disturb;
disturb.resize(n);
disturb.setZero();
for (int i =0; i<n; ++i) {
disturb(i) = uniform01(generator);
}
disturb.normalize();
Vector x0 = ritz_vectors_.col(i);
double magnitude = x0.norm();
ritz_vectors_.col(i) = x0 + 0.03 * magnitude * disturb;
}
void init(const Vector x0, const Vector x00) {
// initialzie ritz eigen values
ritz_val_.resize(m_n_);
@ -139,21 +236,36 @@ public:
// initialzie ritz eigen vectors
ritz_vectors_.resize(m_n_, m_n_);
ritz_vectors_.setZero();
ritz_vectors_.col(0) = perform_op(x0, x00);
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
if (accelerated_){
ritz_vectors_.col(0) = perform_op(x0, x00);
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
} else {
ritz_vectors_.col(0) = perform_op(x0);
perturb(0);
}
// setting beta
Vector init_resid = ritz_vectors_.col(0);
const double up = init_resid.transpose() * A_ * init_resid;
const double down = init_resid.transpose().dot(init_resid);
const double mu = up/down;
beta_ = mu*mu/4;
if (accelerated_) {
Vector init_resid = ritz_vectors_.col(0);
const double up = init_resid.transpose() * A_ * init_resid;
const double down = init_resid.transpose().dot(init_resid);
const double mu = up/down;
beta_ = mu*mu/4;
setBeta();
}
}
void init() {
Vector x0 = random_vec(m_n_);
Vector x00 = random_vec(m_n_);
Vector x0;
Vector x00;
if (!S_.isZero(0)) {
x0 = S_.row(1);
x00 = S_.row(0);
} else {
x0 = random_vec(m_n_);
x00 = random_vec(m_n_);
}
init(x0, x00);
}
@ -165,7 +277,7 @@ public:
ritz_val_(i) = theta;
// update beta
beta_ = std::max(beta_, theta * theta / 4);
if (accelerated_) beta_ = std::max(beta_, theta * theta / 4);
Vector diff = A_ * x - theta * x;
double error = diff.lpNorm<1>();
@ -179,7 +291,9 @@ public:
if (converged(tol, i)) {
num_converge += 1;
}
if (i>0 && i<ritz_vectors_.cols()-1) {
if (!accelerated_ && i<ritz_vectors_.cols()-1) {
ritz_vectors_.col(i+1) = perform_op(i+1);
} else if (accelerated_ && i>0 && i<ritz_vectors_.cols()-1) {
ritz_vectors_.col(i+1) = perform_op(i+1);
}
}
@ -214,6 +328,15 @@ public:
}
}
void reset() {
if (accelerated_) {
ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1-1), ritz_vectors_.col(m_n_-1-2));
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), ritz_vectors_.col(m_n_-1-1));
} else {
ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1));
}
}
int compute(int maxit, double tol) {
// Starting
int i = 0;
@ -222,6 +345,7 @@ public:
m_niter_ +=1;
nconv = num_converged(tol);
if (nconv >= m_nev_) break;
else reset();
}
// sort the result

View File

@ -480,7 +480,7 @@ static bool PowerMinimumEigenValue(
Eigen::Index numLanczosVectors = 20) {
// a. Compute dominant eigenpair of S using power method
PowerFunctor lmOperator(A, 1, std::min(numLanczosVectors, A.rows()));
PowerFunctor lmOperator(A, S, 1, A.rows());
lmOperator.init();
const int lmConverged = lmOperator.compute(
@ -503,7 +503,7 @@ static bool PowerMinimumEigenValue(
}
Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A;
PowerFunctor minShiftedOperator(C, 1, std::min(numLanczosVectors, C.rows()));
PowerFunctor minShiftedOperator(C, S, 1, C.rows(), true);
minShiftedOperator.init();
const int minConverged = minShiftedOperator.compute(

View File

@ -41,21 +41,37 @@ ShonanAveraging3 fromExampleName(
static const ShonanAveraging3 kShonan = fromExampleName("toyExample.g2o");
/* ************************************************************************* */
TEST(PowerMethod, initialize) {
TEST(PowerMethod, powerIteration) {
// test power accelerated iteration
gtsam::Sparse A(6, 6);
A.coeffRef(0, 0) = 6;
PowerFunctor pf(A, 1, A.rows());
pf.init();
pf.compute(20, 1e-4);
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().cols());
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
Matrix S = Matrix66::Zero();
PowerFunctor apf(A, S, 1, A.rows(), true);
apf.init();
apf.compute(20, 1e-4);
EXPECT_LONGS_EQUAL(6, apf.eigenvectors().cols());
EXPECT_LONGS_EQUAL(6, apf.eigenvectors().rows());
const Vector6 x1 = (Vector(6) << 1.0, 0.0, 0.0, 0.0, 0.0, 0.0).finished();
Vector6 actual = pf.eigenvectors().col(0);
actual(0) = abs(actual(0));
EXPECT(assert_equal(x1, actual));
Vector6 actual0 = apf.eigenvectors().col(0);
actual0(0) = abs(actual0(0));
EXPECT(assert_equal(x1, actual0));
const double ev1 = 6.0;
EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues()(0), 1e-5);
// test power iteration, beta is set to 0
PowerFunctor pf(A, S, 1, A.rows());
pf.init();
pf.compute(20, 1e-4);
// for power method, only 5 ritz vectors converge with 20 iteration
EXPECT_LONGS_EQUAL(5, pf.eigenvectors().cols());
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
Vector6 actual1 = apf.eigenvectors().col(0);
actual1(0) = abs(actual1(0));
EXPECT(assert_equal(x1, actual1));
EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues()(0), 1e-5);
}