From d6e2546cf546d9863a5c2d1c1ffa48036f2459ff Mon Sep 17 00:00:00 2001
From: jingwuOUO <wujing2951@gmail.com>
Date: Sat, 19 Sep 2020 23:28:43 -0400
Subject: [PATCH] feat: Add  Best Heavy Ball alg  to set beta

---
 gtsam/sfm/PowerMethod.h             | 202 ++++++++++++++++++++++------
 gtsam/sfm/ShonanAveraging.cpp       |   4 +-
 gtsam/sfm/tests/testPowerMethod.cpp |  34 +++--
 3 files changed, 190 insertions(+), 50 deletions(-)
diff --git a/gtsam/sfm/PowerMethod.h b/gtsam/sfm/PowerMethod.h
index 3b83fb8a4..b8471e8e2 100644
--- a/gtsam/sfm/PowerMethod.h
+++ b/gtsam/sfm/PowerMethod.h
@@ -30,26 +30,47 @@
 #include <utility>
 #include <vector>
 #include <algorithm>
+#include <cmath>
+#include <chrono>
+#include <random>
 
 namespace gtsam {
 
 using Sparse = Eigen::SparseMatrix<double>;
 
-  /* ************************************************************************* */
+/* ************************************************************************* */
 /// MINIMUM EIGENVALUE COMPUTATIONS
 
-/** This is a lightweight struct used in conjunction with Spectra to compute
- * the minimum eigenvalue and eigenvector of a sparse matrix A; it has a single
- * nontrivial function, perform_op(x,y), that computes and returns the product
- * y = (A + sigma*I) x */
 struct PowerFunctor {
+  /**
+   * \brief Computer i-th Eigenpair with power method
+   *
+   * References :
+   * 1) Rosen, D. and Carlone, L., 2017, September. Computational
+   * enhancements for certifiably correct SLAM. In Proceedings of the
+   * International Conference on Intelligent Robots and Systems.
+   * 2) Yulun Tian and Kasra Khosoussi and David M. Rosen and Jonathan P. How,
+   * 2020, Aug, Distributed Certifiably Correct Pose-Graph Optimization, Arxiv
+   * 3) C. de Sa, B. He, I. Mitliagkas, C. Ré, and P. Xu, “Accelerated
+   * stochastic power iteration,” in Proc. Mach. Learn. Res., no. 84, 2018, pp. 58–67
+   *
+   * It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta *
+   * x_{k-1} \f$ where A is the certificate matrix, x is the ritz vector
+   *
+   */
+
   // Const reference to an externally-held matrix whose minimum-eigenvalue we
   // want to compute
   const Sparse &A_;
 
+  const Matrix &S_;
+
   const int m_n_; // dimension of Matrix A
   const int m_nev_; // number of eigenvalues required
 
+  // flag for running power method or accelerated power method. If false, the former, vice versa.
+  bool accelerated_;
+
   // a Polyak momentum term
   double beta_;
 
@@ -64,41 +85,55 @@ private:
   Matrix sorted_ritz_vectors_; // sorted converged eigenvectors
 
 public:
-  // Constructor
-  explicit PowerFunctor(const Sparse &A, int nev, int ncv, 
-                             double beta = 0)
-      : A_(A),
-        m_n_(A.rows()),
-        m_nev_(nev),
-        // m_ncv_(ncv > m_n_ ? m_n_ : ncv),
-        beta_(beta),
-        m_niter_(0)
-         {}
+ // Constructor
+ explicit PowerFunctor(const Sparse& A, const Matrix& S, int nev, int ncv,
+                       bool accelerated = false, double beta = 0)
+     : A_(A),
+       S_(S),
+       m_n_(A.rows()),
+       m_nev_(nev),
+       // m_ncv_(ncv > m_n_ ? m_n_ : ncv),
+       accelerated_(accelerated),
+       beta_(beta),
+       m_niter_(0) {
+         // Do nothing
+       }
 
-  int rows() const { return A_.rows(); }
-  int cols() const { return A_.cols(); }
+ int rows() const { return A_.rows(); }
+ int cols() const { return A_.cols(); }
 
-  // Matrix-vector multiplication operation
-  Vector perform_op(const Vector& x1, const Vector& x0) const {
+ // Matrix-vector multiplication operation
+ Vector perform_op(const Vector& x1, const Vector& x0) const {
+   // Do the multiplication
+   Vector x2 = A_ * x1 - beta_ * x0;
+   x2.normalize();
+   return x2;
+  }
 
-    // Do the multiplication using wrapped Eigen vectors
-    Vector x2 = A_ * x1 - beta_ * x0;
+  Vector perform_op(const Vector& x1, const Vector& x0, const double beta) const {
+    Vector x2 = A_ * x1 - beta * x0;
+    x2.normalize();
+    return x2;
+  }
+
+  Vector perform_op(const Vector& x1) const {
+    Vector x2 = A_ * x1;
     x2.normalize();
     return x2;
   }
 
   Vector perform_op(int i) const {
-
-    // Do the multiplication using wrapped Eigen vectors
-    Vector x1 = ritz_vectors_.col(i-1);
-    Vector x2 = ritz_vectors_.col(i-2);
-    return perform_op(x1, x2);
+    if (accelerated_) {
+      Vector x1 = ritz_vectors_.col(i-1);
+      Vector x2 = ritz_vectors_.col(i-2);
+      return perform_op(x1, x2);
+    } else
+      return perform_op(ritz_vectors_.col(i-1));
   }
 
   long next_long_rand(long seed) {
     const unsigned int m_a = 16807;
     const unsigned long m_max = 2147483647L;
-    // long m_rand = m_n_ ? (m_n_ & m_max) : 1;
     unsigned long lo, hi;
 
     lo = m_a * (long)(seed & 0xFFFF);
@@ -127,6 +162,68 @@ public:
     return res;
   }
 
+  /// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3)
+  void setBeta() {
+    if (m_n_ < 10) return;
+    double maxBeta = beta_;
+    size_t maxIndex;
+    std::vector<double> betas = {2/3*maxBeta, 0.99*maxBeta, maxBeta, 1.01*maxBeta, 1.5*maxBeta};
+
+    Matrix tmp_ritz_vectors;
+    tmp_ritz_vectors.resize(m_n_, 10);
+    tmp_ritz_vectors.setZero();
+    for (size_t i = 0; i < 10; i++) {
+      for (size_t k = 0; k < betas.size(); ++k) {
+        for (size_t j = 1; j < 10; j++) {
+          // double rayleighQuotient;
+          if (j <2 ) {
+            Vector x0 = random_vec(m_n_);
+            Vector x00 = random_vec(m_n_);
+            tmp_ritz_vectors.col(0) = perform_op(x0, x00, betas[k]);
+            tmp_ritz_vectors.col(1) =
+                perform_op(tmp_ritz_vectors.col(0), x0, betas[k]);
+          }
+          else {
+            tmp_ritz_vectors.col(j) =
+                perform_op(tmp_ritz_vectors.col(j - 1),
+                           tmp_ritz_vectors.col(j - 2), betas[k]);
+          }
+          const Vector x = tmp_ritz_vectors.col(j);
+          const double up = x.transpose() * A_ * x;
+          const double down = x.transpose().dot(x);
+          const double mu = up / down;
+          if (mu * mu / 4 > maxBeta) {
+            maxIndex = k;
+            maxBeta = mu * mu / 4;
+            break;
+          }
+        }
+      }
+    }
+
+    beta_ = betas[maxIndex];
+  }
+
+  void perturb(int i) {
+    // generate a 0.03*||x_0||_2 as stated in David's paper
+    unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+    std::mt19937 generator (seed);
+    std::uniform_real_distribution<double> uniform01(0.0, 1.0);
+
+    int n = m_n_;
+    Vector disturb;
+    disturb.resize(n);
+    disturb.setZero();
+    for (int i =0; i<n; ++i) {
+      disturb(i) = uniform01(generator);
+    }
+    disturb.normalize();
+
+    Vector x0 = ritz_vectors_.col(i);
+    double magnitude = x0.norm();
+    ritz_vectors_.col(i) = x0 + 0.03 * magnitude * disturb;
+  }
+
   void init(const Vector x0, const Vector x00) {
     // initialzie ritz eigen values
     ritz_val_.resize(m_n_);
@@ -139,21 +236,36 @@ public:
     // initialzie ritz eigen vectors
     ritz_vectors_.resize(m_n_, m_n_);
     ritz_vectors_.setZero();
-    ritz_vectors_.col(0) = perform_op(x0, x00);
-    ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
+    if (accelerated_){
+      ritz_vectors_.col(0) = perform_op(x0, x00);
+      ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
+    } else {
+      ritz_vectors_.col(0) = perform_op(x0);
+      perturb(0);
+    }
 
     // setting beta
-    Vector init_resid = ritz_vectors_.col(0);
-    const double up = init_resid.transpose() * A_ * init_resid;
-    const double down = init_resid.transpose().dot(init_resid);
-    const double mu =  up/down;
-    beta_ = mu*mu/4;
-
+    if (accelerated_) {
+      Vector init_resid = ritz_vectors_.col(0);
+      const double up = init_resid.transpose() * A_ * init_resid;
+      const double down = init_resid.transpose().dot(init_resid);
+      const double mu =  up/down;
+      beta_ = mu*mu/4;
+      setBeta();
+    }
+    
   }
 
   void init() {
-    Vector x0 = random_vec(m_n_);
-    Vector x00 = random_vec(m_n_);
+    Vector x0;
+    Vector x00;
+    if (!S_.isZero(0)) {
+      x0 = S_.row(1);
+      x00 = S_.row(0);
+    } else {
+      x0 = random_vec(m_n_);
+      x00 = random_vec(m_n_);
+    }
     init(x0, x00);
   }
 
@@ -165,7 +277,7 @@ public:
     ritz_val_(i) = theta;
 
     // update beta
-    beta_ = std::max(beta_, theta * theta / 4);
+    if (accelerated_) beta_ = std::max(beta_, theta * theta / 4);
 
     Vector diff = A_ * x - theta * x;
     double error = diff.lpNorm<1>();
@@ -179,7 +291,9 @@ public:
       if (converged(tol, i)) {
         num_converge += 1;
       }
-      if (i>0 && i<ritz_vectors_.cols()-1) {
+      if (!accelerated_ && i<ritz_vectors_.cols()-1) {
+        ritz_vectors_.col(i+1) = perform_op(i+1);
+      } else if (accelerated_ && i>0 && i<ritz_vectors_.cols()-1) {
         ritz_vectors_.col(i+1) = perform_op(i+1);
       }
     }
@@ -214,6 +328,15 @@ public:
     }
   }
 
+  void reset() {
+    if (accelerated_) {
+      ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1-1), ritz_vectors_.col(m_n_-1-2));
+      ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), ritz_vectors_.col(m_n_-1-1));
+    } else {
+      ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1));
+    }
+  }
+
   int compute(int maxit, double tol) {
     // Starting
     int i = 0;
@@ -222,6 +345,7 @@ public:
       m_niter_ +=1;
       nconv = num_converged(tol);
       if (nconv >= m_nev_) break;
+      else reset();
     }
 
     // sort the result
diff --git a/gtsam/sfm/ShonanAveraging.cpp b/gtsam/sfm/ShonanAveraging.cpp
index 786c91890..f287437d4 100644
--- a/gtsam/sfm/ShonanAveraging.cpp
+++ b/gtsam/sfm/ShonanAveraging.cpp
@@ -480,7 +480,7 @@ static bool PowerMinimumEigenValue(
     Eigen::Index numLanczosVectors = 20) {
 
   // a. Compute dominant eigenpair of S using power method
-  PowerFunctor lmOperator(A, 1, std::min(numLanczosVectors, A.rows()));
+  PowerFunctor lmOperator(A, S, 1, A.rows());
   lmOperator.init();
 
   const int lmConverged = lmOperator.compute(
@@ -503,7 +503,7 @@ static bool PowerMinimumEigenValue(
   }
 
   Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A;
-  PowerFunctor minShiftedOperator(C, 1, std::min(numLanczosVectors, C.rows()));
+  PowerFunctor minShiftedOperator(C, S, 1, C.rows(), true);
   minShiftedOperator.init();
 
   const int minConverged = minShiftedOperator.compute(
diff --git a/gtsam/sfm/tests/testPowerMethod.cpp b/gtsam/sfm/tests/testPowerMethod.cpp
index 2e9c17345..4547c91b9 100644
--- a/gtsam/sfm/tests/testPowerMethod.cpp
+++ b/gtsam/sfm/tests/testPowerMethod.cpp
@@ -41,21 +41,37 @@ ShonanAveraging3 fromExampleName(
 static const ShonanAveraging3 kShonan = fromExampleName("toyExample.g2o");
 
 /* ************************************************************************* */
-TEST(PowerMethod, initialize) {
+TEST(PowerMethod, powerIteration) {
+  // test power accelerated iteration
   gtsam::Sparse A(6, 6);
   A.coeffRef(0, 0) = 6;
-  PowerFunctor pf(A, 1, A.rows());
-  pf.init();
-  pf.compute(20, 1e-4);
-  EXPECT_LONGS_EQUAL(6, pf.eigenvectors().cols());
-  EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
+  Matrix S = Matrix66::Zero();
+  PowerFunctor apf(A, S, 1, A.rows(), true);
+  apf.init();
+  apf.compute(20, 1e-4);
+  EXPECT_LONGS_EQUAL(6, apf.eigenvectors().cols());
+  EXPECT_LONGS_EQUAL(6, apf.eigenvectors().rows());
 
   const Vector6 x1 = (Vector(6) << 1.0, 0.0, 0.0, 0.0, 0.0, 0.0).finished();
-  Vector6 actual = pf.eigenvectors().col(0);
-  actual(0) = abs(actual(0));
-  EXPECT(assert_equal(x1, actual));
+  Vector6 actual0 = apf.eigenvectors().col(0);
+  actual0(0) = abs(actual0(0));
+  EXPECT(assert_equal(x1, actual0));
 
   const double ev1 = 6.0;
+  EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues()(0), 1e-5);
+
+  // test power iteration, beta is set to 0
+  PowerFunctor pf(A, S, 1, A.rows());
+  pf.init();
+  pf.compute(20, 1e-4);
+  // for power method, only 5 ritz vectors converge with 20 iteration
+  EXPECT_LONGS_EQUAL(5, pf.eigenvectors().cols());
+  EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
+
+  Vector6 actual1 = apf.eigenvectors().col(0);
+  actual1(0) = abs(actual1(0));
+  EXPECT(assert_equal(x1, actual1));
+
   EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues()(0), 1e-5);
 }