feat: Add Best Heavy Ball alg to set beta

2020-09-19 23:28:43 -04:00 · 2020-09-19 23:28:43 -04:00 · d6e2546cf5
parent fc112a4dc7
commit d6e2546cf5
3 changed files with 190 additions and 50 deletions
--- a/gtsam/sfm/PowerMethod.h
+++ b/gtsam/sfm/PowerMethod.h
@ -30,26 +30,47 @@
 #include <utility>
 #include <vector>
 #include <algorithm>
+#include <cmath>
+#include <chrono>
+#include <random>

 namespace gtsam {

 using Sparse = Eigen::SparseMatrix<double>;

-  /* ************************************************************************* */
+/* ************************************************************************* */
 /// MINIMUM EIGENVALUE COMPUTATIONS

-/** This is a lightweight struct used in conjunction with Spectra to compute
- * the minimum eigenvalue and eigenvector of a sparse matrix A; it has a single
- * nontrivial function, perform_op(x,y), that computes and returns the product
- * y = (A + sigma*I) x */
 struct PowerFunctor {
+  /**
+   * \brief Computer i-th Eigenpair with power method
+   *
+   * References :
+   * 1) Rosen, D. and Carlone, L., 2017, September. Computational
+   * enhancements for certifiably correct SLAM. In Proceedings of the
+   * International Conference on Intelligent Robots and Systems.
+   * 2) Yulun Tian and Kasra Khosoussi and David M. Rosen and Jonathan P. How,
+   * 2020, Aug, Distributed Certifiably Correct Pose-Graph Optimization, Arxiv
+   * 3) C. de Sa, B. He, I. Mitliagkas, C. Ré, and P. Xu, “Accelerated
+   * stochastic power iteration,” in Proc. Mach. Learn. Res., no. 84, 2018, pp. 58–67
+   *
+   * It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta *
+   * x_{k-1} \f$ where A is the certificate matrix, x is the ritz vector
+   *
+   */
+
  // Const reference to an externally-held matrix whose minimum-eigenvalue we
  // want to compute
  const Sparse &A_;

+  const Matrix &S_;
+
  const int m_n_; // dimension of Matrix A
  const int m_nev_; // number of eigenvalues required

+  // flag for running power method or accelerated power method. If false, the former, vice versa.
+  bool accelerated_;
+
  // a Polyak momentum term
  double beta_;

@ -64,41 +85,55 @@ private:
  Matrix sorted_ritz_vectors_; // sorted converged eigenvectors

 public:
-  // Constructor
-  explicit PowerFunctor(const Sparse &A, int nev, int ncv, 
-                             double beta = 0)
-      : A_(A),
-        m_n_(A.rows()),
-        m_nev_(nev),
-        // m_ncv_(ncv > m_n_ ? m_n_ : ncv),
-        beta_(beta),
-        m_niter_(0)
-         {}
+ // Constructor
+ explicit PowerFunctor(const Sparse& A, const Matrix& S, int nev, int ncv,
+                       bool accelerated = false, double beta = 0)
+     : A_(A),
+       S_(S),
+       m_n_(A.rows()),
+       m_nev_(nev),
+       // m_ncv_(ncv > m_n_ ? m_n_ : ncv),
+       accelerated_(accelerated),
+       beta_(beta),
+       m_niter_(0) {
+         // Do nothing
+       }

-  int rows() const { return A_.rows(); }
-  int cols() const { return A_.cols(); }
+ int rows() const { return A_.rows(); }
+ int cols() const { return A_.cols(); }

-  // Matrix-vector multiplication operation
-  Vector perform_op(const Vector& x1, const Vector& x0) const {
+ // Matrix-vector multiplication operation
+ Vector perform_op(const Vector& x1, const Vector& x0) const {
+   // Do the multiplication
+   Vector x2 = A_ * x1 - beta_ * x0;
+   x2.normalize();
+   return x2;
+  }

-    // Do the multiplication using wrapped Eigen vectors
-    Vector x2 = A_ * x1 - beta_ * x0;
+  Vector perform_op(const Vector& x1, const Vector& x0, const double beta) const {
+    Vector x2 = A_ * x1 - beta * x0;
+    x2.normalize();
+    return x2;
+  }
+
+  Vector perform_op(const Vector& x1) const {
+    Vector x2 = A_ * x1;
    x2.normalize();
    return x2;
  }

  Vector perform_op(int i) const {
-
-    // Do the multiplication using wrapped Eigen vectors
-    Vector x1 = ritz_vectors_.col(i-1);
-    Vector x2 = ritz_vectors_.col(i-2);
-    return perform_op(x1, x2);
+    if (accelerated_) {
+      Vector x1 = ritz_vectors_.col(i-1);
+      Vector x2 = ritz_vectors_.col(i-2);
+      return perform_op(x1, x2);
+    } else
+      return perform_op(ritz_vectors_.col(i-1));
  }

  long next_long_rand(long seed) {
    const unsigned int m_a = 16807;
    const unsigned long m_max = 2147483647L;
-    // long m_rand = m_n_ ? (m_n_ & m_max) : 1;
    unsigned long lo, hi;

    lo = m_a * (long)(seed & 0xFFFF);
@ -127,6 +162,68 @@ public:
    return res;
  }

+  /// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3)
+  void setBeta() {
+    if (m_n_ < 10) return;
+    double maxBeta = beta_;
+    size_t maxIndex;
+    std::vector<double> betas = {2/3*maxBeta, 0.99*maxBeta, maxBeta, 1.01*maxBeta, 1.5*maxBeta};
+
+    Matrix tmp_ritz_vectors;
+    tmp_ritz_vectors.resize(m_n_, 10);
+    tmp_ritz_vectors.setZero();
+    for (size_t i = 0; i < 10; i++) {
+      for (size_t k = 0; k < betas.size(); ++k) {
+        for (size_t j = 1; j < 10; j++) {
+          // double rayleighQuotient;
+          if (j <2 ) {
+            Vector x0 = random_vec(m_n_);
+            Vector x00 = random_vec(m_n_);
+            tmp_ritz_vectors.col(0) = perform_op(x0, x00, betas[k]);
+            tmp_ritz_vectors.col(1) =
+                perform_op(tmp_ritz_vectors.col(0), x0, betas[k]);
+          }
+          else {
+            tmp_ritz_vectors.col(j) =
+                perform_op(tmp_ritz_vectors.col(j - 1),
+                           tmp_ritz_vectors.col(j - 2), betas[k]);
+          }
+          const Vector x = tmp_ritz_vectors.col(j);
+          const double up = x.transpose() * A_ * x;
+          const double down = x.transpose().dot(x);
+          const double mu = up / down;
+          if (mu * mu / 4 > maxBeta) {
+            maxIndex = k;
+            maxBeta = mu * mu / 4;
+            break;
+          }
+        }
+      }
+    }
+
+    beta_ = betas[maxIndex];
+  }
+
+  void perturb(int i) {
+    // generate a 0.03*||x_0||_2 as stated in David's paper
+    unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+    std::mt19937 generator (seed);
+    std::uniform_real_distribution<double> uniform01(0.0, 1.0);
+
+    int n = m_n_;
+    Vector disturb;
+    disturb.resize(n);
+    disturb.setZero();
+    for (int i =0; i<n; ++i) {
+      disturb(i) = uniform01(generator);
+    }
+    disturb.normalize();
+
+    Vector x0 = ritz_vectors_.col(i);
+    double magnitude = x0.norm();
+    ritz_vectors_.col(i) = x0 + 0.03 * magnitude * disturb;
+  }
+
  void init(const Vector x0, const Vector x00) {
    // initialzie ritz eigen values
    ritz_val_.resize(m_n_);
@ -139,21 +236,36 @@ public:
    // initialzie ritz eigen vectors
    ritz_vectors_.resize(m_n_, m_n_);
    ritz_vectors_.setZero();
-    ritz_vectors_.col(0) = perform_op(x0, x00);
-    ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
+    if (accelerated_){
+      ritz_vectors_.col(0) = perform_op(x0, x00);
+      ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
+    } else {
+      ritz_vectors_.col(0) = perform_op(x0);
+      perturb(0);
+    }

    // setting beta
-    Vector init_resid = ritz_vectors_.col(0);
-    const double up = init_resid.transpose() * A_ * init_resid;
-    const double down = init_resid.transpose().dot(init_resid);
-    const double mu =  up/down;
-    beta_ = mu*mu/4;
-
+    if (accelerated_) {
+      Vector init_resid = ritz_vectors_.col(0);
+      const double up = init_resid.transpose() * A_ * init_resid;
+      const double down = init_resid.transpose().dot(init_resid);
+      const double mu =  up/down;
+      beta_ = mu*mu/4;
+      setBeta();
+    }
+    
  }

  void init() {
-    Vector x0 = random_vec(m_n_);
-    Vector x00 = random_vec(m_n_);
+    Vector x0;
+    Vector x00;
+    if (!S_.isZero(0)) {
+      x0 = S_.row(1);
+      x00 = S_.row(0);
+    } else {
+      x0 = random_vec(m_n_);
+      x00 = random_vec(m_n_);
+    }
    init(x0, x00);
  }

@ -165,7 +277,7 @@ public:
    ritz_val_(i) = theta;

    // update beta
-    beta_ = std::max(beta_, theta * theta / 4);
+    if (accelerated_) beta_ = std::max(beta_, theta * theta / 4);

    Vector diff = A_ * x - theta * x;
    double error = diff.lpNorm<1>();
@ -179,7 +291,9 @@ public:
      if (converged(tol, i)) {
        num_converge += 1;
      }
-      if (i>0 && i<ritz_vectors_.cols()-1) {
+      if (!accelerated_ && i<ritz_vectors_.cols()-1) {
+        ritz_vectors_.col(i+1) = perform_op(i+1);
+      } else if (accelerated_ && i>0 && i<ritz_vectors_.cols()-1) {
        ritz_vectors_.col(i+1) = perform_op(i+1);
      }
    }
@ -214,6 +328,15 @@ public:
    }
  }

+  void reset() {
+    if (accelerated_) {
+      ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1-1), ritz_vectors_.col(m_n_-1-2));
+      ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), ritz_vectors_.col(m_n_-1-1));
+    } else {
+      ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1));
+    }
+  }
+
  int compute(int maxit, double tol) {
    // Starting
    int i = 0;
@ -222,6 +345,7 @@ public:
      m_niter_ +=1;
      nconv = num_converged(tol);
      if (nconv >= m_nev_) break;
+      else reset();
    }

    // sort the result
--- a/gtsam/sfm/ShonanAveraging.cpp
+++ b/gtsam/sfm/ShonanAveraging.cpp
@ -480,7 +480,7 @@ static bool PowerMinimumEigenValue(
    Eigen::Index numLanczosVectors = 20) {

  // a. Compute dominant eigenpair of S using power method
-  PowerFunctor lmOperator(A, 1, std::min(numLanczosVectors, A.rows()));
+  PowerFunctor lmOperator(A, S, 1, A.rows());
  lmOperator.init();

  const int lmConverged = lmOperator.compute(
@ -503,7 +503,7 @@ static bool PowerMinimumEigenValue(
  }

  Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A;
-  PowerFunctor minShiftedOperator(C, 1, std::min(numLanczosVectors, C.rows()));
+  PowerFunctor minShiftedOperator(C, S, 1, C.rows(), true);
  minShiftedOperator.init();

  const int minConverged = minShiftedOperator.compute(
--- a/gtsam/sfm/tests/testPowerMethod.cpp
+++ b/gtsam/sfm/tests/testPowerMethod.cpp
@ -41,21 +41,37 @@ ShonanAveraging3 fromExampleName(
 static const ShonanAveraging3 kShonan = fromExampleName("toyExample.g2o");

 /* ************************************************************************* */
-TEST(PowerMethod, initialize) {
+TEST(PowerMethod, powerIteration) {
+  // test power accelerated iteration
  gtsam::Sparse A(6, 6);
  A.coeffRef(0, 0) = 6;
-  PowerFunctor pf(A, 1, A.rows());
-  pf.init();
-  pf.compute(20, 1e-4);
-  EXPECT_LONGS_EQUAL(6, pf.eigenvectors().cols());
-  EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
+  Matrix S = Matrix66::Zero();
+  PowerFunctor apf(A, S, 1, A.rows(), true);
+  apf.init();
+  apf.compute(20, 1e-4);
+  EXPECT_LONGS_EQUAL(6, apf.eigenvectors().cols());
+  EXPECT_LONGS_EQUAL(6, apf.eigenvectors().rows());

  const Vector6 x1 = (Vector(6) << 1.0, 0.0, 0.0, 0.0, 0.0, 0.0).finished();
-  Vector6 actual = pf.eigenvectors().col(0);
-  actual(0) = abs(actual(0));
-  EXPECT(assert_equal(x1, actual));
+  Vector6 actual0 = apf.eigenvectors().col(0);
+  actual0(0) = abs(actual0(0));
+  EXPECT(assert_equal(x1, actual0));

  const double ev1 = 6.0;
+  EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues()(0), 1e-5);
+
+  // test power iteration, beta is set to 0
+  PowerFunctor pf(A, S, 1, A.rows());
+  pf.init();
+  pf.compute(20, 1e-4);
+  // for power method, only 5 ritz vectors converge with 20 iteration
+  EXPECT_LONGS_EQUAL(5, pf.eigenvectors().cols());
+  EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
+
+  Vector6 actual1 = apf.eigenvectors().col(0);
+  actual1(0) = abs(actual1(0));
+  EXPECT(assert_equal(x1, actual1));
+
  EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues()(0), 1e-5);
 }