From 11969b32f60e562e1ebb569ace8318739960bcd9 Mon Sep 17 00:00:00 2001
From: thduynguyen <thduynguyen@gmail.com>
Date: Wed, 24 Sep 2014 22:19:47 -0400
Subject: [PATCH] fix gradient and gradientAtZero for linear constrained
 Jacobian, optionally scaled by the dual variables

---
 gtsam/linear/GaussianFactor.h        |  2 +-
 gtsam/linear/GaussianFactorGraph.cpp |  7 +++--
 gtsam/linear/GaussianFactorGraph.h   |  4 +--
 gtsam/linear/HessianFactor.cpp       |  8 +----
 gtsam/linear/HessianFactor.h         |  7 +++--
 gtsam/linear/JacobianFactor.cpp      | 46 ++++++++++++++++++++--------
 gtsam/linear/JacobianFactor.h        |  8 +++--
 7 files changed, 53 insertions(+), 29 deletions(-)
diff --git a/gtsam/linear/GaussianFactor.h b/gtsam/linear/GaussianFactor.h
index 7f7562af0..366b494e5 100644
--- a/gtsam/linear/GaussianFactor.h
+++ b/gtsam/linear/GaussianFactor.h
@@ -128,7 +128,7 @@ namespace gtsam {
     virtual void multiplyHessianAdd(double alpha, const double* x, double* y) const = 0;
 
     /// A'*b for Jacobian, eta for Hessian
-    virtual VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const = 0;
+    virtual VectorValues gradientAtZero(const boost::optional<const VectorValues&> negDuals = boost::none) const = 0;
 
     /// A'*b for Jacobian, eta for Hessian (raw memory version)
     virtual void gradientAtZero(double* d) const = 0;
diff --git a/gtsam/linear/GaussianFactorGraph.cpp b/gtsam/linear/GaussianFactorGraph.cpp
index 9510ba5a0..91603f041 100644
--- a/gtsam/linear/GaussianFactorGraph.cpp
+++ b/gtsam/linear/GaussianFactorGraph.cpp
@@ -254,6 +254,7 @@ namespace gtsam {
   map<Key,Matrix> GaussianFactorGraph::hessianBlockDiagonal() const {
     map<Key,Matrix> blocks;
     BOOST_FOREACH(const sharedFactor& factor, *this) {
+      if (!factor) continue;
       map<Key,Matrix> BD = factor->hessianBlockDiagonal();
       map<Key,Matrix>::const_iterator it = BD.begin();
       for(;it!=BD.end();it++) {
@@ -299,11 +300,12 @@ namespace gtsam {
   }
 
   /* ************************************************************************* */
-  VectorValues GaussianFactorGraph::gradientAtZero() const {
+VectorValues GaussianFactorGraph::gradientAtZero(
+    const boost::optional<const VectorValues&> negDuals) const {
     // Zero-out the gradient
     VectorValues g;
     BOOST_FOREACH(const sharedFactor& factor, *this) {
-      VectorValues gi = factor->gradientAtZero();
+      VectorValues gi = factor->gradientAtZero(negDuals);
       g.addInPlace_(gi);
     }
     return g;
@@ -426,6 +428,7 @@ namespace gtsam {
       const VectorValues& delta) const {
     GaussianFactorGraph::shared_ptr dualGraph(new GaussianFactorGraph());
     BOOST_FOREACH(const Key key, constrainedVariables) {
+      // Each constrained key becomes a factor in the dual graph
       dualGraph->push_back(createDualFactor(key, variableIndex, delta));
     }
     return dualGraph;
diff --git a/gtsam/linear/GaussianFactorGraph.h b/gtsam/linear/GaussianFactorGraph.h
index 09c725cf9..d6614b4fa 100644
--- a/gtsam/linear/GaussianFactorGraph.h
+++ b/gtsam/linear/GaussianFactorGraph.h
@@ -262,11 +262,11 @@ namespace gtsam {
     /**
      * Compute the gradient of the energy function, \f$ \nabla_{x=0} \left\Vert \Sigma^{-1} A x - b
      * \right\Vert^2 \f$, centered around zero. The gradient is \f$ A^T(Ax-b) \f$.
-     * @param fg The Jacobian factor graph $(A,b)$
+     * @param duals[optional] Values of dual variables to scale constrained gradients, \lambda*\nabla c(x)
      * @param [output] g A VectorValues to store the gradient, which must be preallocated,
      *        see allocateVectorValues
      * @return The gradient as a VectorValues */
-    virtual VectorValues gradientAtZero() const;
+    virtual VectorValues gradientAtZero(const boost::optional<const VectorValues&> duals = boost::none) const;
 
     /** Optimize along the gradient direction, with a closed-form computation to perform the line
      *  search.  The gradient is computed about \f$ \delta x=0 \f$.
diff --git a/gtsam/linear/HessianFactor.cpp b/gtsam/linear/HessianFactor.cpp
index 8cc04132f..66b77b9e4 100644
--- a/gtsam/linear/HessianFactor.cpp
+++ b/gtsam/linear/HessianFactor.cpp
@@ -591,17 +591,11 @@ void HessianFactor::multiplyHessianAdd(double alpha, const double* x,
 
 
 /* ************************************************************************* */
-VectorValues HessianFactor::gradientAtZero(const boost::optional<Vector&> dual) const {
+VectorValues HessianFactor::gradientAtZero(const boost::optional<const VectorValues&> negDuals) const {
   VectorValues g;
   size_t n = size();
   for (size_t j = 0; j < n; ++j)
     g.insert(keys_[j], -info_(j,n).knownOffDiagonal());
-  if (dual) {
-    if (dual->size() != 1) {
-      throw std::runtime_error("Fail to scale the gradient with the dual vector: Size mismatch!");
-    }
-    g = (*dual)[0] * g;
-  }
   return g;
 }
 
diff --git a/gtsam/linear/HessianFactor.h b/gtsam/linear/HessianFactor.h
index 1f32200d0..805be5455 100644
--- a/gtsam/linear/HessianFactor.h
+++ b/gtsam/linear/HessianFactor.h
@@ -384,8 +384,11 @@ namespace gtsam {
 
     void multiplyHessianAdd(double alpha, const double* x, double* y) const {};
 
-    /// eta for Hessian
-    VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const;
+    /**
+     * eta for Hessian
+     * Ignore duals parameters. It's only valid for constraints, which need to be JacobianFactor
+     */
+    VectorValues gradientAtZero(const boost::optional<const VectorValues&> negDuals = boost::none) const;
 
     virtual void gradientAtZero(double* d) const;
 
diff --git a/gtsam/linear/JacobianFactor.cpp b/gtsam/linear/JacobianFactor.cpp
index 59943fef2..7881883b7 100644
--- a/gtsam/linear/JacobianFactor.cpp
+++ b/gtsam/linear/JacobianFactor.cpp
@@ -152,8 +152,8 @@ JacobianFactor::JacobianFactor(const HessianFactor& factor) :
   bool success;
   boost::tie(maxrank, success) = choleskyCareful(Ab_.matrix());
 
-  factor.print("HessianFactor to convert: ");
-  cout << "Maxrank: " << maxrank << ", success: " << int(success) << endl;
+//  factor.print("HessianFactor to convert: ");
+//  cout << "Maxrank: " << maxrank << ", success: " << int(success) << endl;
 
   // Check for indefinite system
   if (!success) {
@@ -606,20 +606,36 @@ void JacobianFactor::multiplyHessianAdd(double alpha, const double* x,
 
 /* ************************************************************************* */
 VectorValues JacobianFactor::gradientAtZero(
-    const boost::optional<Vector&> dual) const {
+    const boost::optional<const VectorValues&> negDuals) const {
   VectorValues g;
-  Vector b = getb();
-  // Gradient is really -A'*b / sigma^2
-  // transposeMultiplyAdd will divide by sigma once, so we need one more
-  Vector b_sigma = model_ ? model_->whiten(b) : b;
-  // scale b by the dual vector if it exists
-  if (dual) {
-    if (dual->size() != b_sigma.size())
+
+  /* We treat linear constraints differently: They are not least square terms, 0.5*||Ax-b||^2,
+   * but simply linear constraints: Ax-b=0.
+   * The constraint function is c(x) = Ax-b. It's Jacobian is A,
+   * and the gradient is the sum of columns of A', each optionally scaled by the
+   * corresponding element of negDual vector.
+   */
+  if (isConstrained()) {
+    Vector scale = ones(rows());
+    if (negDuals && dualKey_) {
+      scale = negDuals->at(dualKey());
+      if (scale.size() != rows())
+        throw std::runtime_error(
+            "Fail to scale the gradient with the dual vector: Size mismatch!");
+    }
+    if (negDuals && !dualKey_) {
       throw std::runtime_error(
-          "Fail to scale the gradient with the dual vector: Size mismatch!");
-    b_sigma = b_sigma.cwiseProduct(*dual);
+          "Fail to scale the gradient with the dual vector: No dual key!");
+    }
+    this->transposeMultiplyAdd(1.0, scale, g); // g = A'*scale
+  }
+  else {
+    Vector b = getb();
+    // Gradient is really -A'*b / sigma^2
+    // transposeMultiplyAdd will divide by sigma once, so we need one more
+    Vector b_sigma = model_ ? model_->whiten(b) : b;
+    this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
   }
-  this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
   return g;
 }
 
@@ -630,6 +646,10 @@ void JacobianFactor::gradientAtZero(double* d) const {
 
 /* ************************************************************************* */
 Vector JacobianFactor::gradient(Key key, const VectorValues& x) const {
+  if (isConstrained()) { // Untested. But see the explaination in gradientAtZero()
+    Matrix A = getA(find(key));
+    return A.transpose()*ones(rows());
+  }
   // TODO: optimize it for JacobianFactor without converting to a HessianFactor
   HessianFactor hessian(*this);
   return hessian.gradient(key, x);
diff --git a/gtsam/linear/JacobianFactor.h b/gtsam/linear/JacobianFactor.h
index 2c887209b..4ed4271c4 100644
--- a/gtsam/linear/JacobianFactor.h
+++ b/gtsam/linear/JacobianFactor.h
@@ -342,9 +342,13 @@ public:
   }
   ;
 
-  /// A'*b for Jacobian
+  /**
+   * A'*b for Jacobian,
+   * with the option to scale with the corresponding negative dual variable
+   * for constrained factor, -\lambda*\nabla c
+   */
   VectorValues gradientAtZero(
-      const boost::optional<Vector&> dual = boost::none) const;
+      const boost::optional<const VectorValues&> negDuals = boost::none) const;
 
   /* ************************************************************************* */
   virtual void gradientAtZero(double* d) const;