Revise according to Frank and David's comments

release/4.3a0
jingwuOUO 2020-10-22 14:19:05 -04:00
parent fbebd3ed69
commit 5f50e740b7
3 changed files with 94 additions and 92 deletions

View File

@ -34,7 +34,7 @@ using Sparse = Eigen::SparseMatrix<double>;
template <class Operator> template <class Operator>
class AcceleratedPowerMethod : public PowerMethod<Operator> { class AcceleratedPowerMethod : public PowerMethod<Operator> {
/** /**
* \brief Compute maximum Eigenpair with power method * \brief Compute maximum Eigenpair with accelerated power method
* *
* References : * References :
* 1) Rosen, D. and Carlone, L., 2017, September. Computational * 1) Rosen, D. and Carlone, L., 2017, September. Computational
@ -46,8 +46,9 @@ class AcceleratedPowerMethod : public PowerMethod<Operator> {
* stochastic power iteration, in Proc. Mach. Learn. Res., no. 84, 2018, pp. * stochastic power iteration, in Proc. Mach. Learn. Res., no. 84, 2018, pp.
* 5867 * 5867
* *
* It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta * * It performs the following iteration: \f$ x_{k+1} = A * x_k - \beta *
* x_{k-1} \f$ where A is the certificate matrix, x is the Ritz vector * x_{k-1} \f$ where A is the aim matrix we want to get eigenpair of, x is the
* Ritz vector
* *
*/ */
@ -59,46 +60,52 @@ class AcceleratedPowerMethod : public PowerMethod<Operator> {
// Constructor from aim matrix A (given as Matrix or Sparse), optional intial // Constructor from aim matrix A (given as Matrix or Sparse), optional intial
// vector as ritzVector // vector as ritzVector
explicit AcceleratedPowerMethod( explicit AcceleratedPowerMethod(
const Operator &A, const boost::optional<Vector> initial = boost::none) const Operator &A, const boost::optional<Vector> initial = boost::none,
const double initialBeta = 0.0)
: PowerMethod<Operator>(A, initial) { : PowerMethod<Operator>(A, initial) {
Vector x0; Vector x0;
// initialize ritz vector // initialize ritz vector
x0 = initial ? Vector::Random(this->dim_) : initial.get(); x0 = initial ? initial.get() : Vector::Random(this->dim_);
Vector x00 = Vector::Random(this->dim_); Vector x00 = Vector::Random(this->dim_);
x0.normalize(); x0.normalize();
x00.normalize(); x00.normalize();
// initialize Ritz eigen vector and previous vector // initialize Ritz eigen vector and previous vector
previousVector_ = update(x0, x00, beta_); previousVector_ = powerIteration(x0, x00, beta_);
this->ritzVector_ = update(previousVector_, x0, beta_); this->ritzVector_ = powerIteration(previousVector_, x0, beta_);
this->perturb();
// set beta // initialize beta_
Vector init_resid = this->ritzVector_; if (!initialBeta) {
const double up = init_resid.transpose() * this->A_ * init_resid; estimateBeta();
const double down = init_resid.transpose().dot(init_resid); } else {
const double mu = up / down; beta_ = initialBeta;
beta_ = mu * mu / 4; }
setBeta();
} }
// Update the ritzVector with beta and previous two ritzVector // Update the ritzVector with beta and previous two ritzVector
Vector update(const Vector &x1, const Vector &x0, const double beta) const { Vector powerIteration(const Vector &x1, const Vector &x0,
const double beta) const {
Vector y = this->A_ * x1 - beta * x0; Vector y = this->A_ * x1 - beta * x0;
y.normalize(); y.normalize();
return y; return y;
} }
// Update the ritzVector with beta and previous two ritzVector // Update the ritzVector with beta and previous two ritzVector
Vector update() const { Vector powerIteration() const {
Vector y = update(this->ritzVector_, previousVector_, beta_); Vector y = powerIteration(this->ritzVector_, previousVector_, beta_);
previousVector_ = this->ritzVector_; previousVector_ = this->ritzVector_;
return y; return y;
} }
// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3) // Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3)
void setBeta() { void estimateBeta() {
double maxBeta = beta_; // set beta
Vector init_resid = this->ritzVector_;
const double up = init_resid.transpose() * this->A_ * init_resid;
const double down = init_resid.transpose().dot(init_resid);
const double mu = up / down;
double maxBeta = mu * mu / 4;
size_t maxIndex; size_t maxIndex;
std::vector<double> betas = {2 / 3 * maxBeta, 0.99 * maxBeta, maxBeta, std::vector<double> betas = {2 / 3 * maxBeta, 0.99 * maxBeta, maxBeta,
1.01 * maxBeta, 1.5 * maxBeta}; 1.01 * maxBeta, 1.5 * maxBeta};
@ -110,10 +117,10 @@ class AcceleratedPowerMethod : public PowerMethod<Operator> {
if (j < 2) { if (j < 2) {
Vector x0 = this->ritzVector_; Vector x0 = this->ritzVector_;
Vector x00 = previousVector_; Vector x00 = previousVector_;
R.col(0) = update(x0, x00, betas[k]); R.col(0) = powerIteration(x0, x00, betas[k]);
R.col(1) = update(R.col(0), x0, betas[k]); R.col(1) = powerIteration(R.col(0), x0, betas[k]);
} else { } else {
R.col(j) = update(R.col(j - 1), R.col(j - 2), betas[k]); R.col(j) = powerIteration(R.col(j - 1), R.col(j - 2), betas[k]);
} }
} }
const Vector x = R.col(9); const Vector x = R.col(9);

View File

@ -46,8 +46,8 @@ class PowerMethod {
size_t nrIterations_; // number of iterations size_t nrIterations_; // number of iterations
double ritzValue_; // all Ritz eigenvalues double ritzValue_; // Ritz eigenvalue
Vector ritzVector_; // all Ritz eigenvectors Vector ritzVector_; // Ritz eigenvector
public: public:
// Constructor // Constructor
@ -55,85 +55,61 @@ class PowerMethod {
const boost::optional<Vector> initial = boost::none) const boost::optional<Vector> initial = boost::none)
: A_(A), dim_(A.rows()), nrIterations_(0) { : A_(A), dim_(A.rows()), nrIterations_(0) {
Vector x0; Vector x0;
x0 = initial ? Vector::Random(dim_) : initial.get(); x0 = initial ? initial.get() : Vector::Random(dim_);
x0.normalize(); x0.normalize();
// initialize Ritz eigen values // initialize Ritz eigen value
ritzValue_ = 0.0; ritzValue_ = 0.0;
// initialize Ritz eigen vectors // initialize Ritz eigen vectors
ritzVector_ = Vector::Zero(dim_); ritzVector_ = Vector::Zero(dim_);
ritzVector_ = powerIteration(x0);
ritzVector_.col(0) = update(x0);
perturb();
} }
// Update the vector by dot product with A_ // Update the vector by dot product with A_
Vector update(const Vector &x) const { Vector powerIteration(const Vector &x) const {
Vector y = A_ * x; Vector y = A_ * x;
y.normalize(); y.normalize();
return y; return y;
} }
// Update the vector by dot product with A_ // Update the vector by dot product with A_
Vector update() const { return update(ritzVector_); } Vector powerIteration() const { return powerIteration(ritzVector_); }
// Perturb the initial ritzvector // After Perform power iteration on a single Ritz value, if the error is less
void perturb() { // than the tol then return true else false
// generate a 0.03*||x_0||_2 as stated in David's paper bool converged(double tol) {
std::mt19937 rng(42);
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
int n = dim_;
// Vector disturb(n);
// for (int i = 0; i < n; ++i) {
// disturb(i) = uniform01(rng);
// }
Vector disturb = Vector::Random(n);
disturb.normalize();
Vector x0 = ritzVector_;
double magnitude = x0.norm();
ritzVector_ = x0 + 0.03 * magnitude * disturb;
}
// Perform power iteration on a single Ritz value
// Updates ritzValue_
bool iterateOne(double tol) {
const Vector x = ritzVector_; const Vector x = ritzVector_;
double theta = x.transpose() * A_ * x;
// store the Ritz eigen value // store the Ritz eigen value
ritzValue_ = theta; ritzValue_ = x.dot(A_ * x);
double error = (A_ * x - ritzValue_ * x).norm();
const Vector diff = A_ * x - theta * x;
double error = diff.norm();
return error < tol; return error < tol;
} }
// Return the number of iterations // Return the number of iterations
size_t nrIterations() const { return nrIterations_; } size_t nrIterations() const { return nrIterations_; }
// Start the iteration until the ritz error converge // Start the power/accelerated iteration, after updated the ritz vector,
int compute(int maxIterations, double tol) { // calculate the ritz error, repeat this operation until the ritz error converge
int compute(size_t maxIterations, double tol) {
// Starting // Starting
int nrConverged = 0; bool isConverged = false;
for (int i = 0; i < maxIterations; i++) { for (size_t i = 0; i < maxIterations; i++) {
nrIterations_ += 1; ++nrIterations_ ;
ritzVector_ = update(); ritzVector_ = powerIteration();
nrConverged = iterateOne(tol); isConverged = converged(tol);
if (nrConverged) break; if (isConverged) return isConverged;
} }
return std::min(1, nrConverged); return isConverged;
} }
// Return the eigenvalue // Return the eigenvalue
double eigenvalues() const { return ritzValue_; } double eigenvalue() const { return ritzValue_; }
// Return the eigenvector // Return the eigenvector
const Vector eigenvectors() const { return ritzVector_; } Vector eigenvector() const { return ritzVector_; }
}; };
} // namespace gtsam } // namespace gtsam

View File

@ -471,14 +471,40 @@ Sparse ShonanAveraging<d>::computeA(const Values &values) const {
return Lambda - Q_; return Lambda - Q_;
} }
/* ************************************************************************* */
template <size_t d>
Sparse ShonanAveraging<d>::computeA(const Matrix &S) const {
auto Lambda = computeLambda(S);
return Lambda - Q_;
}
/* ************************************************************************* */
// Perturb the initial initialVector by adding a spherically-uniformly
// distributed random vector with 0.03*||initialVector||_2 magnitude to
// initialVector
Vector perturb(const Vector &initialVector) {
// generate a 0.03*||x_0||_2 as stated in David's paper
int n = initialVector.rows();
Vector disturb = Vector::Random(n);
disturb.normalize();
double magnitude = initialVector.norm();
Vector perturbedVector = initialVector + 0.03 * magnitude * disturb;
perturbedVector.normalize();
return perturbedVector;
}
/* ************************************************************************* */ /* ************************************************************************* */
/// MINIMUM EIGENVALUE COMPUTATIONS /// MINIMUM EIGENVALUE COMPUTATIONS
// Alg.6 from paper Distributed Certifiably Correct Pose-Graph Optimization, // Alg.6 from paper Distributed Certifiably Correct Pose-Graph Optimization,
// it takes in the certificate matrix A as input, the maxIterations and the // it takes in the certificate matrix A as input, the maxIterations and the
// minEigenvalueNonnegativityTolerance is set to 1000 and 10e-4 ad default, // minEigenvalueNonnegativityTolerance is set to 1000 and 10e-4 ad default,
// there are two parts // there are two parts
// in this algorithm: // in this algorithm:
// (1) // (1) compute the maximum eigenpair (\lamda_dom, \vect{v}_dom) of A by power
// method. if \lamda_dom is less than zero, then return the eigenpair. (2)
// compute the maximum eigenpair (\theta, \vect{v}) of C = \lamda_dom * I - A by
// accelerated power method. Then return (\lamda_dom - \theta, \vect{v}).
static bool PowerMinimumEigenValue( static bool PowerMinimumEigenValue(
const Sparse &A, const Matrix &S, double *minEigenValue, const Sparse &A, const Matrix &S, double *minEigenValue,
Vector *minEigenVector = 0, size_t *numIterations = 0, Vector *minEigenVector = 0, size_t *numIterations = 0,
@ -486,39 +512,39 @@ static bool PowerMinimumEigenValue(
double minEigenvalueNonnegativityTolerance = 10e-4) { double minEigenvalueNonnegativityTolerance = 10e-4) {
// a. Compute dominant eigenpair of S using power method // a. Compute dominant eigenpair of S using power method
const boost::optional<Vector> initial(S.row(0)); PowerMethod<Sparse> lmOperator(A);
PowerMethod<Sparse> lmOperator(A, initial);
const int lmConverged = lmOperator.compute( const bool lmConverged = lmOperator.compute(
maxIterations, 1e-5); maxIterations, 1e-5);
// Check convergence and bail out if necessary // Check convergence and bail out if necessary
if (lmConverged != 1) return false; if (!lmConverged) return false;
const double lmEigenValue = lmOperator.eigenvalues(); const double lmEigenValue = lmOperator.eigenvalue();
if (lmEigenValue < 0) { if (lmEigenValue < 0) {
// The largest-magnitude eigenvalue is negative, and therefore also the // The largest-magnitude eigenvalue is negative, and therefore also the
// minimum eigenvalue, so just return this solution // minimum eigenvalue, so just return this solution
*minEigenValue = lmEigenValue; *minEigenValue = lmEigenValue;
if (minEigenVector) { if (minEigenVector) {
*minEigenVector = lmOperator.eigenvectors(); *minEigenVector = lmOperator.eigenvector();
minEigenVector->normalize(); // Ensure that this is a unit vector minEigenVector->normalize(); // Ensure that this is a unit vector
} }
return true; return true;
} }
const Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A; const Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A;
const boost::optional<Vector> initial = perturb(S.row(0));
AcceleratedPowerMethod<Sparse> minShiftedOperator(C, initial); AcceleratedPowerMethod<Sparse> minShiftedOperator(C, initial);
const int minConverged = minShiftedOperator.compute( const bool minConverged = minShiftedOperator.compute(
maxIterations, minEigenvalueNonnegativityTolerance / lmEigenValue); maxIterations, minEigenvalueNonnegativityTolerance / lmEigenValue);
if (minConverged != 1) return false; if (!minConverged) return false;
*minEigenValue = lmEigenValue - minShiftedOperator.eigenvalues(); *minEigenValue = lmEigenValue - minShiftedOperator.eigenvalue();
if (minEigenVector) { if (minEigenVector) {
*minEigenVector = minShiftedOperator.eigenvectors(); *minEigenVector = minShiftedOperator.eigenvector();
minEigenVector->normalize(); // Ensure that this is a unit vector minEigenVector->normalize(); // Ensure that this is a unit vector
} }
if (numIterations) *numIterations = minShiftedOperator.nrIterations(); if (numIterations) *numIterations = minShiftedOperator.nrIterations();
@ -669,13 +695,6 @@ static bool SparseMinimumEigenValue(
return true; return true;
} }
/* ************************************************************************* */
template <size_t d>
Sparse ShonanAveraging<d>::computeA(const Matrix &S) const {
auto Lambda = computeLambda(S);
return Lambda - Q_;
}
/* ************************************************************************* */ /* ************************************************************************* */
template <size_t d> template <size_t d>
double ShonanAveraging<d>::computeMinEigenValue(const Values &values, double ShonanAveraging<d>::computeMinEigenValue(const Values &values,