diff --git a/gtsam/base/Testable.h b/gtsam/base/Testable.h
index a308c50a1..d24db84dd 100644
--- a/gtsam/base/Testable.h
+++ b/gtsam/base/Testable.h
@@ -86,7 +86,7 @@ namespace gtsam {
    * This template works for any type with equals
    */
   template<class V>
-  bool assert_equal(const V& expected, const V& actual, double tol = 1e-9) {
+  bool assert_equal(const V& expected, const V& actual, double tol = 1e-8) {
     if (actual.equals(expected, tol))
       return true;
     printf("Not equal:\n");
diff --git a/gtsam/linear/GaussianFactor.h b/gtsam/linear/GaussianFactor.h
index 8c8e2cb2b..8d3b83332 100644
--- a/gtsam/linear/GaussianFactor.h
+++ b/gtsam/linear/GaussianFactor.h
@@ -128,7 +128,7 @@ namespace gtsam {
     virtual void multiplyHessianAdd(double alpha, const double* x, double* y) const = 0;
 
     /// A'*b for Jacobian, eta for Hessian
-    virtual VectorValues gradientAtZero() const = 0;
+    virtual VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const = 0;
 
     /// A'*b for Jacobian, eta for Hessian (raw memory version)
     virtual void gradientAtZero(double* d) const = 0;
diff --git a/gtsam/linear/GaussianFactorGraph.cpp b/gtsam/linear/GaussianFactorGraph.cpp
index 488a26b09..eb5bd7b8b 100644
--- a/gtsam/linear/GaussianFactorGraph.cpp
+++ b/gtsam/linear/GaussianFactorGraph.cpp
@@ -392,19 +392,26 @@ namespace gtsam {
   }
 
   /* ************************************************************************* */
-  std::pair<GaussianFactorGraph, GaussianFactorGraph> GaussianFactorGraph::splitConstraints() const {
+  boost::tuple<GaussianFactorGraph, GaussianFactorGraph, GaussianFactorGraph> GaussianFactorGraph::splitConstraints() const {
+    typedef HessianFactor H;
     typedef JacobianFactor J;
-    GaussianFactorGraph unconstraints, constraints;
+
+    GaussianFactorGraph hessians, jacobians, constraints;
     BOOST_FOREACH(const GaussianFactor::shared_ptr& factor, *this) {
-      J::shared_ptr jacobian(boost::dynamic_pointer_cast<J>(factor));
-      if (jacobian && jacobian->get_model() && jacobian->get_model()->isConstrained()) {
-        constraints.push_back(jacobian);
-      }
+      H::shared_ptr hessian(boost::dynamic_pointer_cast<H>(factor));
+      if (hessian)
+        hessians.push_back(factor);
       else {
-        unconstraints.push_back(factor);
+        J::shared_ptr jacobian(boost::dynamic_pointer_cast<J>(factor));
+        if (jacobian && jacobian->get_model() && jacobian->get_model()->isConstrained()) {
+          constraints.push_back(jacobian);
+        }
+        else {
+          jacobians.push_back(factor);
+        }
       }
     }
-    return make_pair(unconstraints, constraints);
+    return boost::make_tuple(hessians, jacobians, constraints);
   }
 
   /* ************************************************************************* */
diff --git a/gtsam/linear/GaussianFactorGraph.h b/gtsam/linear/GaussianFactorGraph.h
index ede8c1fe9..80e8d36fd 100644
--- a/gtsam/linear/GaussianFactorGraph.h
+++ b/gtsam/linear/GaussianFactorGraph.h
@@ -326,7 +326,7 @@ namespace gtsam {
      * Split constraints and unconstrained factors into two different graphs
      * @return a pair of <unconstrained, constrained> graphs
      */
-    std::pair<GaussianFactorGraph, GaussianFactorGraph> splitConstraints() const;
+    boost::tuple<GaussianFactorGraph, GaussianFactorGraph, GaussianFactorGraph> splitConstraints() const;
 
 
   private:
diff --git a/gtsam/linear/HessianFactor.cpp b/gtsam/linear/HessianFactor.cpp
index c70f73c7b..6e091f703 100644
--- a/gtsam/linear/HessianFactor.cpp
+++ b/gtsam/linear/HessianFactor.cpp
@@ -591,11 +591,17 @@ void HessianFactor::multiplyHessianAdd(double alpha, const double* x,
 
 
 /* ************************************************************************* */
-VectorValues HessianFactor::gradientAtZero() const {
+VectorValues HessianFactor::gradientAtZero(const boost::optional<Vector&> dual) const {
   VectorValues g;
   size_t n = size();
   for (size_t j = 0; j < n; ++j)
     g.insert(keys_[j], -info_(j,n).knownOffDiagonal());
+  if (dual) {
+    if (dual->size() != 1) {
+      throw std::runtime_error("Fail to scale the gradient with the dual vector: Size mismatch!");
+    }
+    g = (*dual)[0] * g;
+  }
   return g;
 }
 
@@ -626,6 +632,7 @@ EliminateCholesky(const GaussianFactorGraph& factors, const Ordering& keys)
   HessianFactor::shared_ptr jointFactor;
   try {
     jointFactor = boost::make_shared<HessianFactor>(factors, Scatter(factors, keys));
+//    jointFactor->print("jointFactor: ");
   } catch(std::invalid_argument&) {
     throw InvalidDenseElimination(
         "EliminateCholesky was called with a request to eliminate variables that are not\n"
@@ -640,6 +647,7 @@ EliminateCholesky(const GaussianFactorGraph& factors, const Ordering& keys)
     // Erase the eliminated keys in the remaining factor
     jointFactor->keys_.erase(jointFactor->begin(), jointFactor->begin() + keys.size());
   } catch(CholeskyFailed&) {
+//    std::cout << "Problematic Hessian: " << jointFactor->information() << std::endl;
     throw IndeterminantLinearSystemException(keys.front());
   }
 
@@ -675,23 +683,92 @@ EliminatePreferCholesky(const GaussianFactorGraph& factors, const Ordering& keys
    * and (2) large strange value of lambdas might cause the joint factor non-positive
    * definite [is this true?]. But at least, this will help in typical cases.
    */
-  GaussianFactorGraph unconstraints, constraints;
-  boost::tie(unconstraints, constraints) = factors.splitConstraints();
+  GaussianFactorGraph hessians, jacobians, constraints;
+//  factors.print("factors: ");
+  boost::tie(hessians, jacobians, constraints) = factors.splitConstraints();
+//  keys.print("Frontal variables to eliminate: ");
+//  hessians.print("Hessians: ");
+//  jacobians.print("Jacobians: ");
+//  constraints.print("Constraints: ");
+
+  bool hasHessians = hessians.size() > 0;
+
+  // Add all jacobians to gather as much info as we can
+  hessians.push_back(jacobians);
+
   if (constraints.size()>0) {
-    // Build joint factor
-    HessianFactor::shared_ptr jointFactor;
-    try {
-      jointFactor = boost::make_shared<HessianFactor>(unconstraints, Scatter(factors, keys));
-    } catch(std::invalid_argument&) {
-      throw InvalidDenseElimination(
-          "EliminateCholesky was called with a request to eliminate variables that are not\n"
-          "involved in the provided factors.");
+//    // Build joint factor
+//    HessianFactor::shared_ptr jointFactor;
+//    try {
+//      jointFactor = boost::make_shared<HessianFactor>(hessians, Scatter(factors, keys));
+//    } catch(std::invalid_argument&) {
+//      throw InvalidDenseElimination(
+//          "EliminateCholesky was called with a request to eliminate variables that are not\n"
+//          "involved in the provided factors.");
+//    }
+//    constraints.push_back(jointFactor);
+//    return EliminateQR(constraints, keys);
+
+    // If there are hessian factors, turn them into conditional
+    // by doing partial elimination, then use those jacobians when eliminating the constraints
+    GaussianFactor::shared_ptr unconstrainedNewFactor;
+    if (hessians.size() > 0) {
+      bool hasSeparator = false;
+      GaussianFactorGraph::Keys unconstrainedKeys = hessians.keys();
+      BOOST_FOREACH(Key key, unconstrainedKeys) {
+        if (find(keys.begin(), keys.end(), key) == keys.end()) {
+          hasSeparator = true;
+          break;
+        }
+      }
+
+      if (hasSeparator) {
+        // find frontal keys in the unconstrained factor to eliminate
+        Ordering subkeys;
+        BOOST_FOREACH(Key key, keys) {
+          if (unconstrainedKeys.exists(key))
+            subkeys.push_back(key);
+        }
+        GaussianConditional::shared_ptr unconstrainedConditional;
+        boost::tie(unconstrainedConditional, unconstrainedNewFactor)
+            = EliminateCholesky(hessians, subkeys);
+        constraints.push_back(unconstrainedConditional);
+      }
+      else {
+        if (hasHessians) {
+          HessianFactor::shared_ptr jointFactor = boost::make_shared<
+              HessianFactor>(hessians, Scatter(factors, keys));
+          constraints.push_back(jointFactor);
+        }
+        else {
+          constraints.push_back(hessians);
+        }
+      }
+    }
+
+    // Now eliminate the constraints
+    GaussianConditional::shared_ptr constrainedConditional;
+    GaussianFactor::shared_ptr constrainedNewFactor;
+    boost::tie(constrainedConditional, constrainedNewFactor) = EliminateQR(
+        constraints, keys);
+//    constraints.print("constraints: ");
+//    constrainedConditional->print("constrainedConditional: ");
+//    constrainedNewFactor->print("constrainedNewFactor: ");
+
+    if (unconstrainedNewFactor) {
+      GaussianFactorGraph newFactors;
+      newFactors.push_back(unconstrainedNewFactor);
+      newFactors.push_back(constrainedNewFactor);
+//      newFactors.print("newFactors: ");
+      HessianFactor::shared_ptr newFactor(new HessianFactor(newFactors));
+      return make_pair(constrainedConditional, newFactor);
+    } else {
+      return make_pair(constrainedConditional, constrainedNewFactor);
     }
-    constraints.push_back(jointFactor);
-    return EliminateQR(constraints, keys);
   }
-  else
+  else {
     return EliminateCholesky(factors, keys);
+  }
 }
 
 } // gtsam
diff --git a/gtsam/linear/HessianFactor.h b/gtsam/linear/HessianFactor.h
index 62d84925c..6bddfb365 100644
--- a/gtsam/linear/HessianFactor.h
+++ b/gtsam/linear/HessianFactor.h
@@ -385,7 +385,7 @@ namespace gtsam {
     void multiplyHessianAdd(double alpha, const double* x, double* y) const {};
 
     /// eta for Hessian
-    VectorValues gradientAtZero() const;
+    VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const;
 
     virtual void gradientAtZero(double* d) const;
 
diff --git a/gtsam/linear/JacobianFactor.cpp b/gtsam/linear/JacobianFactor.cpp
index dd9664935..ecaf05155 100644
--- a/gtsam/linear/JacobianFactor.cpp
+++ b/gtsam/linear/JacobianFactor.cpp
@@ -144,8 +144,9 @@ JacobianFactor::JacobianFactor(const HessianFactor& factor) :
   boost::tie(maxrank, success) = choleskyCareful(Ab_.matrix());
 
   // Check for indefinite system
-  if (!success)
+  if (!success) {
     throw IndeterminantLinearSystemException(factor.keys().front());
+  }
 
   // Zero out lower triangle
   Ab_.matrix().topRows(maxrank).triangularView<Eigen::StrictlyLower>() =
@@ -591,12 +592,18 @@ void JacobianFactor::multiplyHessianAdd(double alpha, const double* x,
 }
 
 /* ************************************************************************* */
-VectorValues JacobianFactor::gradientAtZero() const {
+VectorValues JacobianFactor::gradientAtZero(const boost::optional<Vector&> dual) const {
   VectorValues g;
   Vector b = getb();
   // Gradient is really -A'*b / sigma^2
   // transposeMultiplyAdd will divide by sigma once, so we need one more
   Vector b_sigma = model_ ? model_->whiten(b) : b;
+  // scale b by the dual vector if it exists
+  if (dual) {
+    if (dual->size() != b_sigma.size())
+      throw std::runtime_error("Fail to scale the gradient with the dual vector: Size mismatch!");
+    b_sigma = b_sigma.cwiseProduct(*dual);
+  }
   this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
   return g;
 }
@@ -748,12 +755,11 @@ GaussianConditional::shared_ptr JacobianFactor::splitConditional(
   keys_.erase(begin(), begin() + nrFrontals);
   // Set sigmas with the right model
   if (model_) {
-    if (model_->isConstrained())
-      model_ = noiseModel::Constrained::MixedSigmas(
-          model_->sigmas().tail(remainingRows));
+    Vector sigmas = model_->sigmas().tail(remainingRows);
+    if (sigmas.size() > 0 && sigmas.minCoeff() == 0.0)
+      model_ = noiseModel::Constrained::MixedSigmas(sigmas);
     else
-      model_ = noiseModel::Diagonal::Sigmas(
-          model_->sigmas().tail(remainingRows));
+      model_ = noiseModel::Diagonal::Sigmas(sigmas, false); // I don't want to be smart here
     assert(model_->dim() == (size_t)Ab_.rows());
   }
   gttoc(remaining_factor);
diff --git a/gtsam/linear/JacobianFactor.h b/gtsam/linear/JacobianFactor.h
index ce2a47ab3..a5cc90b62 100644
--- a/gtsam/linear/JacobianFactor.h
+++ b/gtsam/linear/JacobianFactor.h
@@ -300,7 +300,7 @@ namespace gtsam {
     void multiplyHessianAdd(double alpha, const double* x, double* y) const {};
 
     /// A'*b for Jacobian
-    VectorValues gradientAtZero() const;
+    VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const;
 
     /* ************************************************************************* */
     virtual void gradientAtZero(double* d) const;
diff --git a/gtsam/linear/tests/testHessianFactor.cpp b/gtsam/linear/tests/testHessianFactor.cpp
index 84c16bd9c..7d9ee4b4f 100644
--- a/gtsam/linear/tests/testHessianFactor.cpp
+++ b/gtsam/linear/tests/testHessianFactor.cpp
@@ -446,6 +446,12 @@ TEST(HessianFactor, gradientAtZero)
   EXPECT(assert_equal(-A.transpose()*b, expectedG.vector(keys)));
   VectorValues actualG = factor.gradientAtZero();
   EXPECT(assert_equal(expectedG, actualG));
+
+  // test gradient at zero scaled with a dual variable
+  Vector dual = (Vector(1) << 5.0);
+  VectorValues actualGscaled = factor.gradientAtZero(dual);
+  VectorValues expectedGscaled = pair_list_of<Key, Vector>(0, -g1*dual[0]) (1, -g2*dual[0]);
+  EXPECT(assert_equal(expectedGscaled, actualGscaled));
 }
 
 /* ************************************************************************* */
diff --git a/gtsam/linear/tests/testJacobianFactor.cpp b/gtsam/linear/tests/testJacobianFactor.cpp
index 1fc7365e7..d4faf7a63 100644
--- a/gtsam/linear/tests/testJacobianFactor.cpp
+++ b/gtsam/linear/tests/testJacobianFactor.cpp
@@ -326,6 +326,14 @@ TEST(JacobianFactor, operators )
   EXPECT(assert_equal(-A.transpose()*b2, expectedG.vector(keys)));
   VectorValues actualG = lf.gradientAtZero();
   EXPECT(assert_equal(expectedG, actualG));
+
+  // test gradient at zero scaled by a dual vector
+  Vector dual = (Vector(2) << 3.0, 5.0);
+  VectorValues actualGscaled = lf.gradientAtZero(dual);
+  VectorValues expectedGscaled;
+  expectedGscaled.insert(1, (Vector(2) << 60,-50));
+  expectedGscaled.insert(2, (Vector(2) << -60, 50));
+  EXPECT(assert_equal(expectedGscaled, actualGscaled));
 }
 
 /* ************************************************************************* */
diff --git a/tests/testNonlinearOptimizer.cpp b/tests/testNonlinearOptimizer.cpp
index 6d8a056fc..7fbee3a5b 100644
--- a/tests/testNonlinearOptimizer.cpp
+++ b/tests/testNonlinearOptimizer.cpp
@@ -273,11 +273,11 @@ TEST_UNSAFE(NonlinearOptimizer, MoreOptimization) {
 
     // test convergence
     Values actual = optimizer.optimize();
-    EXPECT(assert_equal(expected, actual));
+    EXPECT(assert_equal(expected, actual, 1e-6));
 
     // Check that the gradient is zero
     GaussianFactorGraph::shared_ptr linear = optimizer.linearize();
-    EXPECT(assert_equal(expectedGradient,linear->gradientAtZero()));
+    EXPECT(assert_equal(expectedGradient,linear->gradientAtZero(), 1e-7));
   }
   EXPECT(assert_equal(expected, DoglegOptimizer(fg, init).optimize()));