feat: Add Best Heavy Ball alg to set beta
parent
fc112a4dc7
commit
d6e2546cf5
|
|
@ -30,26 +30,47 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <chrono>
|
||||
#include <random>
|
||||
|
||||
namespace gtsam {
|
||||
|
||||
using Sparse = Eigen::SparseMatrix<double>;
|
||||
|
||||
/* ************************************************************************* */
|
||||
/* ************************************************************************* */
|
||||
/// MINIMUM EIGENVALUE COMPUTATIONS
|
||||
|
||||
/** This is a lightweight struct used in conjunction with Spectra to compute
|
||||
* the minimum eigenvalue and eigenvector of a sparse matrix A; it has a single
|
||||
* nontrivial function, perform_op(x,y), that computes and returns the product
|
||||
* y = (A + sigma*I) x */
|
||||
struct PowerFunctor {
|
||||
/**
|
||||
* \brief Computer i-th Eigenpair with power method
|
||||
*
|
||||
* References :
|
||||
* 1) Rosen, D. and Carlone, L., 2017, September. Computational
|
||||
* enhancements for certifiably correct SLAM. In Proceedings of the
|
||||
* International Conference on Intelligent Robots and Systems.
|
||||
* 2) Yulun Tian and Kasra Khosoussi and David M. Rosen and Jonathan P. How,
|
||||
* 2020, Aug, Distributed Certifiably Correct Pose-Graph Optimization, Arxiv
|
||||
* 3) C. de Sa, B. He, I. Mitliagkas, C. Ré, and P. Xu, “Accelerated
|
||||
* stochastic power iteration,” in Proc. Mach. Learn. Res., no. 84, 2018, pp. 58–67
|
||||
*
|
||||
* It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta *
|
||||
* x_{k-1} \f$ where A is the certificate matrix, x is the ritz vector
|
||||
*
|
||||
*/
|
||||
|
||||
// Const reference to an externally-held matrix whose minimum-eigenvalue we
|
||||
// want to compute
|
||||
const Sparse &A_;
|
||||
|
||||
const Matrix &S_;
|
||||
|
||||
const int m_n_; // dimension of Matrix A
|
||||
const int m_nev_; // number of eigenvalues required
|
||||
|
||||
// flag for running power method or accelerated power method. If false, the former, vice versa.
|
||||
bool accelerated_;
|
||||
|
||||
// a Polyak momentum term
|
||||
double beta_;
|
||||
|
||||
|
|
@ -64,41 +85,55 @@ private:
|
|||
Matrix sorted_ritz_vectors_; // sorted converged eigenvectors
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
explicit PowerFunctor(const Sparse &A, int nev, int ncv,
|
||||
double beta = 0)
|
||||
: A_(A),
|
||||
m_n_(A.rows()),
|
||||
m_nev_(nev),
|
||||
// m_ncv_(ncv > m_n_ ? m_n_ : ncv),
|
||||
beta_(beta),
|
||||
m_niter_(0)
|
||||
{}
|
||||
// Constructor
|
||||
explicit PowerFunctor(const Sparse& A, const Matrix& S, int nev, int ncv,
|
||||
bool accelerated = false, double beta = 0)
|
||||
: A_(A),
|
||||
S_(S),
|
||||
m_n_(A.rows()),
|
||||
m_nev_(nev),
|
||||
// m_ncv_(ncv > m_n_ ? m_n_ : ncv),
|
||||
accelerated_(accelerated),
|
||||
beta_(beta),
|
||||
m_niter_(0) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
int rows() const { return A_.rows(); }
|
||||
int cols() const { return A_.cols(); }
|
||||
int rows() const { return A_.rows(); }
|
||||
int cols() const { return A_.cols(); }
|
||||
|
||||
// Matrix-vector multiplication operation
|
||||
Vector perform_op(const Vector& x1, const Vector& x0) const {
|
||||
// Matrix-vector multiplication operation
|
||||
Vector perform_op(const Vector& x1, const Vector& x0) const {
|
||||
// Do the multiplication
|
||||
Vector x2 = A_ * x1 - beta_ * x0;
|
||||
x2.normalize();
|
||||
return x2;
|
||||
}
|
||||
|
||||
// Do the multiplication using wrapped Eigen vectors
|
||||
Vector x2 = A_ * x1 - beta_ * x0;
|
||||
Vector perform_op(const Vector& x1, const Vector& x0, const double beta) const {
|
||||
Vector x2 = A_ * x1 - beta * x0;
|
||||
x2.normalize();
|
||||
return x2;
|
||||
}
|
||||
|
||||
Vector perform_op(const Vector& x1) const {
|
||||
Vector x2 = A_ * x1;
|
||||
x2.normalize();
|
||||
return x2;
|
||||
}
|
||||
|
||||
Vector perform_op(int i) const {
|
||||
|
||||
// Do the multiplication using wrapped Eigen vectors
|
||||
Vector x1 = ritz_vectors_.col(i-1);
|
||||
Vector x2 = ritz_vectors_.col(i-2);
|
||||
return perform_op(x1, x2);
|
||||
if (accelerated_) {
|
||||
Vector x1 = ritz_vectors_.col(i-1);
|
||||
Vector x2 = ritz_vectors_.col(i-2);
|
||||
return perform_op(x1, x2);
|
||||
} else
|
||||
return perform_op(ritz_vectors_.col(i-1));
|
||||
}
|
||||
|
||||
long next_long_rand(long seed) {
|
||||
const unsigned int m_a = 16807;
|
||||
const unsigned long m_max = 2147483647L;
|
||||
// long m_rand = m_n_ ? (m_n_ & m_max) : 1;
|
||||
unsigned long lo, hi;
|
||||
|
||||
lo = m_a * (long)(seed & 0xFFFF);
|
||||
|
|
@ -127,6 +162,68 @@ public:
|
|||
return res;
|
||||
}
|
||||
|
||||
/// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3)
|
||||
void setBeta() {
|
||||
if (m_n_ < 10) return;
|
||||
double maxBeta = beta_;
|
||||
size_t maxIndex;
|
||||
std::vector<double> betas = {2/3*maxBeta, 0.99*maxBeta, maxBeta, 1.01*maxBeta, 1.5*maxBeta};
|
||||
|
||||
Matrix tmp_ritz_vectors;
|
||||
tmp_ritz_vectors.resize(m_n_, 10);
|
||||
tmp_ritz_vectors.setZero();
|
||||
for (size_t i = 0; i < 10; i++) {
|
||||
for (size_t k = 0; k < betas.size(); ++k) {
|
||||
for (size_t j = 1; j < 10; j++) {
|
||||
// double rayleighQuotient;
|
||||
if (j <2 ) {
|
||||
Vector x0 = random_vec(m_n_);
|
||||
Vector x00 = random_vec(m_n_);
|
||||
tmp_ritz_vectors.col(0) = perform_op(x0, x00, betas[k]);
|
||||
tmp_ritz_vectors.col(1) =
|
||||
perform_op(tmp_ritz_vectors.col(0), x0, betas[k]);
|
||||
}
|
||||
else {
|
||||
tmp_ritz_vectors.col(j) =
|
||||
perform_op(tmp_ritz_vectors.col(j - 1),
|
||||
tmp_ritz_vectors.col(j - 2), betas[k]);
|
||||
}
|
||||
const Vector x = tmp_ritz_vectors.col(j);
|
||||
const double up = x.transpose() * A_ * x;
|
||||
const double down = x.transpose().dot(x);
|
||||
const double mu = up / down;
|
||||
if (mu * mu / 4 > maxBeta) {
|
||||
maxIndex = k;
|
||||
maxBeta = mu * mu / 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
beta_ = betas[maxIndex];
|
||||
}
|
||||
|
||||
void perturb(int i) {
|
||||
// generate a 0.03*||x_0||_2 as stated in David's paper
|
||||
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
|
||||
std::mt19937 generator (seed);
|
||||
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
|
||||
|
||||
int n = m_n_;
|
||||
Vector disturb;
|
||||
disturb.resize(n);
|
||||
disturb.setZero();
|
||||
for (int i =0; i<n; ++i) {
|
||||
disturb(i) = uniform01(generator);
|
||||
}
|
||||
disturb.normalize();
|
||||
|
||||
Vector x0 = ritz_vectors_.col(i);
|
||||
double magnitude = x0.norm();
|
||||
ritz_vectors_.col(i) = x0 + 0.03 * magnitude * disturb;
|
||||
}
|
||||
|
||||
void init(const Vector x0, const Vector x00) {
|
||||
// initialzie ritz eigen values
|
||||
ritz_val_.resize(m_n_);
|
||||
|
|
@ -139,21 +236,36 @@ public:
|
|||
// initialzie ritz eigen vectors
|
||||
ritz_vectors_.resize(m_n_, m_n_);
|
||||
ritz_vectors_.setZero();
|
||||
ritz_vectors_.col(0) = perform_op(x0, x00);
|
||||
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
|
||||
if (accelerated_){
|
||||
ritz_vectors_.col(0) = perform_op(x0, x00);
|
||||
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
|
||||
} else {
|
||||
ritz_vectors_.col(0) = perform_op(x0);
|
||||
perturb(0);
|
||||
}
|
||||
|
||||
// setting beta
|
||||
Vector init_resid = ritz_vectors_.col(0);
|
||||
const double up = init_resid.transpose() * A_ * init_resid;
|
||||
const double down = init_resid.transpose().dot(init_resid);
|
||||
const double mu = up/down;
|
||||
beta_ = mu*mu/4;
|
||||
|
||||
if (accelerated_) {
|
||||
Vector init_resid = ritz_vectors_.col(0);
|
||||
const double up = init_resid.transpose() * A_ * init_resid;
|
||||
const double down = init_resid.transpose().dot(init_resid);
|
||||
const double mu = up/down;
|
||||
beta_ = mu*mu/4;
|
||||
setBeta();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void init() {
|
||||
Vector x0 = random_vec(m_n_);
|
||||
Vector x00 = random_vec(m_n_);
|
||||
Vector x0;
|
||||
Vector x00;
|
||||
if (!S_.isZero(0)) {
|
||||
x0 = S_.row(1);
|
||||
x00 = S_.row(0);
|
||||
} else {
|
||||
x0 = random_vec(m_n_);
|
||||
x00 = random_vec(m_n_);
|
||||
}
|
||||
init(x0, x00);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +277,7 @@ public:
|
|||
ritz_val_(i) = theta;
|
||||
|
||||
// update beta
|
||||
beta_ = std::max(beta_, theta * theta / 4);
|
||||
if (accelerated_) beta_ = std::max(beta_, theta * theta / 4);
|
||||
|
||||
Vector diff = A_ * x - theta * x;
|
||||
double error = diff.lpNorm<1>();
|
||||
|
|
@ -179,7 +291,9 @@ public:
|
|||
if (converged(tol, i)) {
|
||||
num_converge += 1;
|
||||
}
|
||||
if (i>0 && i<ritz_vectors_.cols()-1) {
|
||||
if (!accelerated_ && i<ritz_vectors_.cols()-1) {
|
||||
ritz_vectors_.col(i+1) = perform_op(i+1);
|
||||
} else if (accelerated_ && i>0 && i<ritz_vectors_.cols()-1) {
|
||||
ritz_vectors_.col(i+1) = perform_op(i+1);
|
||||
}
|
||||
}
|
||||
|
|
@ -214,6 +328,15 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
if (accelerated_) {
|
||||
ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1-1), ritz_vectors_.col(m_n_-1-2));
|
||||
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), ritz_vectors_.col(m_n_-1-1));
|
||||
} else {
|
||||
ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1));
|
||||
}
|
||||
}
|
||||
|
||||
int compute(int maxit, double tol) {
|
||||
// Starting
|
||||
int i = 0;
|
||||
|
|
@ -222,6 +345,7 @@ public:
|
|||
m_niter_ +=1;
|
||||
nconv = num_converged(tol);
|
||||
if (nconv >= m_nev_) break;
|
||||
else reset();
|
||||
}
|
||||
|
||||
// sort the result
|
||||
|
|
|
|||
|
|
@ -480,7 +480,7 @@ static bool PowerMinimumEigenValue(
|
|||
Eigen::Index numLanczosVectors = 20) {
|
||||
|
||||
// a. Compute dominant eigenpair of S using power method
|
||||
PowerFunctor lmOperator(A, 1, std::min(numLanczosVectors, A.rows()));
|
||||
PowerFunctor lmOperator(A, S, 1, A.rows());
|
||||
lmOperator.init();
|
||||
|
||||
const int lmConverged = lmOperator.compute(
|
||||
|
|
@ -503,7 +503,7 @@ static bool PowerMinimumEigenValue(
|
|||
}
|
||||
|
||||
Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A;
|
||||
PowerFunctor minShiftedOperator(C, 1, std::min(numLanczosVectors, C.rows()));
|
||||
PowerFunctor minShiftedOperator(C, S, 1, C.rows(), true);
|
||||
minShiftedOperator.init();
|
||||
|
||||
const int minConverged = minShiftedOperator.compute(
|
||||
|
|
|
|||
|
|
@ -41,21 +41,37 @@ ShonanAveraging3 fromExampleName(
|
|||
static const ShonanAveraging3 kShonan = fromExampleName("toyExample.g2o");
|
||||
|
||||
/* ************************************************************************* */
|
||||
TEST(PowerMethod, initialize) {
|
||||
TEST(PowerMethod, powerIteration) {
|
||||
// test power accelerated iteration
|
||||
gtsam::Sparse A(6, 6);
|
||||
A.coeffRef(0, 0) = 6;
|
||||
PowerFunctor pf(A, 1, A.rows());
|
||||
pf.init();
|
||||
pf.compute(20, 1e-4);
|
||||
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().cols());
|
||||
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
|
||||
Matrix S = Matrix66::Zero();
|
||||
PowerFunctor apf(A, S, 1, A.rows(), true);
|
||||
apf.init();
|
||||
apf.compute(20, 1e-4);
|
||||
EXPECT_LONGS_EQUAL(6, apf.eigenvectors().cols());
|
||||
EXPECT_LONGS_EQUAL(6, apf.eigenvectors().rows());
|
||||
|
||||
const Vector6 x1 = (Vector(6) << 1.0, 0.0, 0.0, 0.0, 0.0, 0.0).finished();
|
||||
Vector6 actual = pf.eigenvectors().col(0);
|
||||
actual(0) = abs(actual(0));
|
||||
EXPECT(assert_equal(x1, actual));
|
||||
Vector6 actual0 = apf.eigenvectors().col(0);
|
||||
actual0(0) = abs(actual0(0));
|
||||
EXPECT(assert_equal(x1, actual0));
|
||||
|
||||
const double ev1 = 6.0;
|
||||
EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues()(0), 1e-5);
|
||||
|
||||
// test power iteration, beta is set to 0
|
||||
PowerFunctor pf(A, S, 1, A.rows());
|
||||
pf.init();
|
||||
pf.compute(20, 1e-4);
|
||||
// for power method, only 5 ritz vectors converge with 20 iteration
|
||||
EXPECT_LONGS_EQUAL(5, pf.eigenvectors().cols());
|
||||
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
|
||||
|
||||
Vector6 actual1 = apf.eigenvectors().col(0);
|
||||
actual1(0) = abs(actual1(0));
|
||||
EXPECT(assert_equal(x1, actual1));
|
||||
|
||||
EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues()(0), 1e-5);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue